diff --git a/contrib/compiler-rt/lib/asan/asan_allocator.cc b/contrib/compiler-rt/lib/asan/asan_allocator.cc index de6613f56727..92963ddfc4da 100644 --- a/contrib/compiler-rt/lib/asan/asan_allocator.cc +++ b/contrib/compiler-rt/lib/asan/asan_allocator.cc @@ -21,7 +21,9 @@ #include "asan_report.h" #include "asan_stack.h" #include "asan_thread.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" #include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_errno.h" #include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_list.h" @@ -799,11 +801,6 @@ void PrintInternalAllocatorStats() { instance.PrintStats(); } -void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack, - AllocType alloc_type) { - return instance.Allocate(size, alignment, stack, alloc_type, true); -} - void asan_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) { instance.Deallocate(ptr, 0, stack, alloc_type); } @@ -814,16 +811,16 @@ void asan_sized_free(void *ptr, uptr size, BufferedStackTrace *stack, } void *asan_malloc(uptr size, BufferedStackTrace *stack) { - return instance.Allocate(size, 8, stack, FROM_MALLOC, true); + return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true)); } void *asan_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { - return instance.Calloc(nmemb, size, stack); + return SetErrnoOnNull(instance.Calloc(nmemb, size, stack)); } void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack) { if (!p) - return instance.Allocate(size, 8, stack, FROM_MALLOC, true); + return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true)); if (size == 0) { if (flags()->allocator_frees_and_returns_null_on_realloc_zero) { instance.Deallocate(p, 0, stack, FROM_MALLOC); @@ -832,26 +829,41 @@ void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack) { // Allocate a size of 1 if we shouldn't free() on Realloc to 0 size = 1; } - return instance.Reallocate(p, size, stack); + return SetErrnoOnNull(instance.Reallocate(p, size, stack)); } void *asan_valloc(uptr size, BufferedStackTrace *stack) { - return instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC, true); + return SetErrnoOnNull( + instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC, true)); } void *asan_pvalloc(uptr size, BufferedStackTrace *stack) { uptr PageSize = GetPageSizeCached(); - size = RoundUpTo(size, PageSize); - if (size == 0) { - // pvalloc(0) should allocate one page. - size = PageSize; + // pvalloc(0) should allocate one page. + size = size ? RoundUpTo(size, PageSize) : PageSize; + return SetErrnoOnNull( + instance.Allocate(size, PageSize, stack, FROM_MALLOC, true)); +} + +void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack, + AllocType alloc_type) { + if (UNLIKELY(!IsPowerOfTwo(alignment))) { + errno = errno_EINVAL; + return AsanAllocator::FailureHandler::OnBadRequest(); } - return instance.Allocate(size, PageSize, stack, FROM_MALLOC, true); + return SetErrnoOnNull( + instance.Allocate(size, alignment, stack, alloc_type, true)); } int asan_posix_memalign(void **memptr, uptr alignment, uptr size, BufferedStackTrace *stack) { + if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { + AsanAllocator::FailureHandler::OnBadRequest(); + return errno_EINVAL; + } void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC, true); + if (UNLIKELY(!ptr)) + return errno_ENOMEM; CHECK(IsAligned((uptr)ptr, alignment)); *memptr = ptr; return 0; diff --git a/contrib/compiler-rt/lib/asan/asan_interceptors.cc b/contrib/compiler-rt/lib/asan/asan_interceptors.cc index ed12a9ac9015..34ca22b8616e 100644 --- a/contrib/compiler-rt/lib/asan/asan_interceptors.cc +++ b/contrib/compiler-rt/lib/asan/asan_interceptors.cc @@ -178,6 +178,10 @@ void SetThreadName(const char *name) { } int OnExit() { + if (CAN_SANITIZE_LEAKS && common_flags()->detect_leaks && + __lsan::HasReportedLeaks()) { + return common_flags()->exitcode; + } // FIXME: ask frontend whether we need to return failure. return 0; } diff --git a/contrib/compiler-rt/lib/builtins/cpu_model.c b/contrib/compiler-rt/lib/builtins/cpu_model.c index c6b30eda0a77..83ea7a49faf7 100644 --- a/contrib/compiler-rt/lib/builtins/cpu_model.c +++ b/contrib/compiler-rt/lib/builtins/cpu_model.c @@ -190,8 +190,8 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_X64) #if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" @@ -200,6 +200,16 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#else + return true; +#endif #elif defined(_MSC_VER) int registers[4]; __cpuidex(registers, value, subleaf); @@ -211,35 +221,6 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, #else return true; #endif -#elif defined(__i386__) || defined(_M_IX86) -#if defined(__GNUC__) || defined(__clang__) - __asm__("movl\t%%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl\t%%ebx, %%esi\n\t" - : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) - : "a"(value), "c"(subleaf)); - return false; -#elif defined(_MSC_VER) - __asm { - mov eax,value - mov ecx,subleaf - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } - return false; -#else - return true; -#endif -#else - return true; -#endif } // Read control register 0 (XCR0). Used to detect features such as AVX. diff --git a/contrib/compiler-rt/lib/lsan/lsan_allocator.cc b/contrib/compiler-rt/lib/lsan/lsan_allocator.cc index 6514aea6f609..2df58b44f6b8 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_allocator.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_allocator.cc @@ -15,7 +15,9 @@ #include "lsan_allocator.h" #include "sanitizer_common/sanitizer_allocator.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" #include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_errno.h" #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_stackdepot.h" #include "sanitizer_common/sanitizer_stacktrace.h" @@ -86,6 +88,13 @@ void *Allocate(const StackTrace &stack, uptr size, uptr alignment, return p; } +static void *Calloc(uptr nmemb, uptr size, const StackTrace &stack) { + if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) + return Allocator::FailureHandler::OnBadRequest(); + size *= nmemb; + return Allocate(stack, size, 1, true); +} + void Deallocate(void *p) { if (&__sanitizer_free_hook) __sanitizer_free_hook(p); RunFreeHooks(p); @@ -118,11 +127,15 @@ uptr GetMallocUsableSize(const void *p) { } void *lsan_memalign(uptr alignment, uptr size, const StackTrace &stack) { - return Allocate(stack, size, alignment, kAlwaysClearMemory); + if (UNLIKELY(!IsPowerOfTwo(alignment))) { + errno = errno_EINVAL; + return Allocator::FailureHandler::OnBadRequest(); + } + return SetErrnoOnNull(Allocate(stack, size, alignment, kAlwaysClearMemory)); } void *lsan_malloc(uptr size, const StackTrace &stack) { - return Allocate(stack, size, 1, kAlwaysClearMemory); + return SetErrnoOnNull(Allocate(stack, size, 1, kAlwaysClearMemory)); } void lsan_free(void *p) { @@ -130,20 +143,16 @@ void lsan_free(void *p) { } void *lsan_realloc(void *p, uptr size, const StackTrace &stack) { - return Reallocate(stack, p, size, 1); + return SetErrnoOnNull(Reallocate(stack, p, size, 1)); } void *lsan_calloc(uptr nmemb, uptr size, const StackTrace &stack) { - if (CheckForCallocOverflow(size, nmemb)) - return Allocator::FailureHandler::OnBadRequest(); - size *= nmemb; - return Allocate(stack, size, 1, true); + return SetErrnoOnNull(Calloc(nmemb, size, stack)); } void *lsan_valloc(uptr size, const StackTrace &stack) { - if (size == 0) - size = GetPageSizeCached(); - return Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory); + return SetErrnoOnNull( + Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory)); } uptr lsan_mz_size(const void *p) { diff --git a/contrib/compiler-rt/lib/lsan/lsan_common.cc b/contrib/compiler-rt/lib/lsan/lsan_common.cc index 4ffa91568cc8..c121e6a8fb24 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_common.cc @@ -576,18 +576,16 @@ static bool CheckForLeaks() { return false; } +static bool has_reported_leaks = false; +bool HasReportedLeaks() { return has_reported_leaks; } + void DoLeakCheck() { BlockingMutexLock l(&global_mutex); static bool already_done; if (already_done) return; already_done = true; - bool have_leaks = CheckForLeaks(); - if (!have_leaks) { - return; - } - if (common_flags()->exitcode) { - Die(); - } + has_reported_leaks = CheckForLeaks(); + if (has_reported_leaks) HandleLeaks(); } static int DoRecoverableLeakCheck() { diff --git a/contrib/compiler-rt/lib/lsan/lsan_common.h b/contrib/compiler-rt/lib/lsan/lsan_common.h index d93ac1b10919..31bf3eb1df42 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common.h +++ b/contrib/compiler-rt/lib/lsan/lsan_common.h @@ -226,6 +226,12 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p); // Return the linker module, if valid for the platform. LoadedModule *GetLinker(); +// Return true if LSan has finished leak checking and reported leaks. +bool HasReportedLeaks(); + +// Run platform-specific leak handlers. +void HandleLeaks(); + // Wrapper for chunk metadata operations. class LsanMetadata { public: diff --git a/contrib/compiler-rt/lib/lsan/lsan_common_linux.cc b/contrib/compiler-rt/lib/lsan/lsan_common_linux.cc index c903be42d1e7..5042c7b3ada5 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common_linux.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_common_linux.cc @@ -100,6 +100,13 @@ struct DoStopTheWorldParam { void *argument; }; +// While calling Die() here is undefined behavior and can potentially +// cause race conditions, it isn't possible to intercept exit on linux, +// so we have no choice but to call Die() from the atexit handler. +void HandleLeaks() { + if (common_flags()->exitcode) Die(); +} + static int DoStopTheWorldCallback(struct dl_phdr_info *info, size_t size, void *data) { DoStopTheWorldParam *param = reinterpret_cast(data); diff --git a/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc b/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc index f87c6b7e0425..ade94340ae81 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc @@ -164,6 +164,11 @@ void ProcessPlatformSpecificAllocations(Frontier *frontier) { } } +// On darwin, we can intercept _exit gracefully, and return a failing exit code +// if required at that point. Calling Die() here is undefined behavior and +// causes rare race conditions. +void HandleLeaks() {} + void DoStopTheWorld(StopTheWorldCallback callback, void *argument) { StopTheWorld(callback, argument); } diff --git a/contrib/compiler-rt/lib/lsan/lsan_interceptors.cc b/contrib/compiler-rt/lib/lsan/lsan_interceptors.cc index 7d514402ad4b..168868b012bc 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_interceptors.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_interceptors.cc @@ -352,6 +352,11 @@ INTERCEPTOR(int, pthread_join, void *th, void **ret) { return res; } +INTERCEPTOR(void, _exit, int status) { + if (status == 0 && HasReportedLeaks()) status = common_flags()->exitcode; + REAL(_exit)(status); +} + namespace __lsan { void InitializeInterceptors() { @@ -371,6 +376,7 @@ void InitializeInterceptors() { LSAN_MAYBE_INTERCEPT_MALLOPT; INTERCEPT_FUNCTION(pthread_create); INTERCEPT_FUNCTION(pthread_join); + INTERCEPT_FUNCTION(_exit); if (pthread_key_create(&g_thread_finalize_key, &thread_finalize)) { Report("LeakSanitizer: failed to create thread key.\n"); diff --git a/contrib/compiler-rt/lib/msan/msan.h b/contrib/compiler-rt/lib/msan/msan.h index 0709260eebe2..fa9c15b88bef 100644 --- a/contrib/compiler-rt/lib/msan/msan.h +++ b/contrib/compiler-rt/lib/msan/msan.h @@ -280,10 +280,18 @@ void InitializeInterceptors(); void MsanAllocatorInit(); void MsanAllocatorThreadFinish(); -void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size); -void *MsanReallocate(StackTrace *stack, void *oldp, uptr size, - uptr alignment, bool zeroise); void MsanDeallocate(StackTrace *stack, void *ptr); + +void *msan_malloc(uptr size, StackTrace *stack); +void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack); +void *msan_realloc(void *ptr, uptr size, StackTrace *stack); +void *msan_valloc(uptr size, StackTrace *stack); +void *msan_pvalloc(uptr size, StackTrace *stack); +void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack); +void *msan_memalign(uptr alignment, uptr size, StackTrace *stack); +int msan_posix_memalign(void **memptr, uptr alignment, uptr size, + StackTrace *stack); + void InstallTrapHandler(); void InstallAtExitHandler(); diff --git a/contrib/compiler-rt/lib/msan/msan_allocator.cc b/contrib/compiler-rt/lib/msan/msan_allocator.cc index a92b7fd12f92..1034dbdf9b55 100644 --- a/contrib/compiler-rt/lib/msan/msan_allocator.cc +++ b/contrib/compiler-rt/lib/msan/msan_allocator.cc @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_allocator.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" #include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_errno.h" #include "msan.h" #include "msan_allocator.h" #include "msan_origin.h" @@ -194,20 +196,8 @@ void MsanDeallocate(StackTrace *stack, void *p) { } } -void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) { - if (CheckForCallocOverflow(size, nmemb)) - return Allocator::FailureHandler::OnBadRequest(); - return MsanReallocate(stack, nullptr, nmemb * size, sizeof(u64), true); -} - void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size, - uptr alignment, bool zeroise) { - if (!old_p) - return MsanAllocate(stack, new_size, alignment, zeroise); - if (!new_size) { - MsanDeallocate(stack, old_p); - return nullptr; - } + uptr alignment) { Metadata *meta = reinterpret_cast(allocator.GetMetaData(old_p)); uptr old_size = meta->requested_size; uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(old_p); @@ -215,10 +205,7 @@ void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size, // We are not reallocating here. meta->requested_size = new_size; if (new_size > old_size) { - if (zeroise) { - __msan_clear_and_unpoison((char *)old_p + old_size, - new_size - old_size); - } else if (flags()->poison_in_malloc) { + if (flags()->poison_in_malloc) { stack->tag = StackTrace::TAG_ALLOC; PoisonMemory((char *)old_p + old_size, new_size - old_size, stack); } @@ -226,8 +213,7 @@ void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size, return old_p; } uptr memcpy_size = Min(new_size, old_size); - void *new_p = MsanAllocate(stack, new_size, alignment, zeroise); - // Printf("realloc: old_size %zd new_size %zd\n", old_size, new_size); + void *new_p = MsanAllocate(stack, new_size, alignment, false /*zeroise*/); if (new_p) { CopyMemory(new_p, old_p, memcpy_size, stack); MsanDeallocate(stack, old_p); @@ -243,6 +229,67 @@ static uptr AllocationSize(const void *p) { return b->requested_size; } +void *msan_malloc(uptr size, StackTrace *stack) { + return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false)); +} + +void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack) { + if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) + return SetErrnoOnNull(Allocator::FailureHandler::OnBadRequest()); + return SetErrnoOnNull(MsanAllocate(stack, nmemb * size, sizeof(u64), true)); +} + +void *msan_realloc(void *ptr, uptr size, StackTrace *stack) { + if (!ptr) + return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false)); + if (size == 0) { + MsanDeallocate(stack, ptr); + return nullptr; + } + return SetErrnoOnNull(MsanReallocate(stack, ptr, size, sizeof(u64))); +} + +void *msan_valloc(uptr size, StackTrace *stack) { + return SetErrnoOnNull(MsanAllocate(stack, size, GetPageSizeCached(), false)); +} + +void *msan_pvalloc(uptr size, StackTrace *stack) { + uptr PageSize = GetPageSizeCached(); + // pvalloc(0) should allocate one page. + size = size == 0 ? PageSize : RoundUpTo(size, PageSize); + return SetErrnoOnNull(MsanAllocate(stack, size, PageSize, false)); +} + +void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) { + if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { + errno = errno_EINVAL; + return Allocator::FailureHandler::OnBadRequest(); + } + return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false)); +} + +void *msan_memalign(uptr alignment, uptr size, StackTrace *stack) { + if (UNLIKELY(!IsPowerOfTwo(alignment))) { + errno = errno_EINVAL; + return Allocator::FailureHandler::OnBadRequest(); + } + return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false)); +} + +int msan_posix_memalign(void **memptr, uptr alignment, uptr size, + StackTrace *stack) { + if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { + Allocator::FailureHandler::OnBadRequest(); + return errno_EINVAL; + } + void *ptr = MsanAllocate(stack, size, alignment, false); + if (UNLIKELY(!ptr)) + return errno_ENOMEM; + CHECK(IsAligned((uptr)ptr, alignment)); + *memptr = ptr; + return 0; +} + } // namespace __msan using namespace __msan; diff --git a/contrib/compiler-rt/lib/msan/msan_interceptors.cc b/contrib/compiler-rt/lib/msan/msan_interceptors.cc index 069777c7f5e7..b5d22baca08d 100644 --- a/contrib/compiler-rt/lib/msan/msan_interceptors.cc +++ b/contrib/compiler-rt/lib/msan/msan_interceptors.cc @@ -161,58 +161,45 @@ INTERCEPTOR(void *, bcopy, const void *src, void *dest, SIZE_T n) { INTERCEPTOR(int, posix_memalign, void **memptr, SIZE_T alignment, SIZE_T size) { GET_MALLOC_STACK_TRACE; - CHECK_EQ(alignment & (alignment - 1), 0); CHECK_NE(memptr, 0); - *memptr = MsanReallocate(&stack, nullptr, size, alignment, false); - CHECK_NE(*memptr, 0); - __msan_unpoison(memptr, sizeof(*memptr)); - return 0; + int res = msan_posix_memalign(memptr, alignment, size, &stack); + if (!res) + __msan_unpoison(memptr, sizeof(*memptr)); + return res; } #if !SANITIZER_FREEBSD -INTERCEPTOR(void *, memalign, SIZE_T boundary, SIZE_T size) { +INTERCEPTOR(void *, memalign, SIZE_T alignment, SIZE_T size) { GET_MALLOC_STACK_TRACE; - CHECK_EQ(boundary & (boundary - 1), 0); - void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false); - return ptr; + return msan_memalign(alignment, size, &stack); } #define MSAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign) #else #define MSAN_MAYBE_INTERCEPT_MEMALIGN #endif -INTERCEPTOR(void *, aligned_alloc, SIZE_T boundary, SIZE_T size) { +INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) { GET_MALLOC_STACK_TRACE; - CHECK_EQ(boundary & (boundary - 1), 0); - void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false); - return ptr; + return msan_aligned_alloc(alignment, size, &stack); } -INTERCEPTOR(void *, __libc_memalign, SIZE_T boundary, SIZE_T size) { +INTERCEPTOR(void *, __libc_memalign, SIZE_T alignment, SIZE_T size) { GET_MALLOC_STACK_TRACE; - CHECK_EQ(boundary & (boundary - 1), 0); - void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false); - DTLS_on_libc_memalign(ptr, size); + void *ptr = msan_memalign(alignment, size, &stack); + if (ptr) + DTLS_on_libc_memalign(ptr, size); return ptr; } INTERCEPTOR(void *, valloc, SIZE_T size) { GET_MALLOC_STACK_TRACE; - void *ptr = MsanReallocate(&stack, nullptr, size, GetPageSizeCached(), false); - return ptr; + return msan_valloc(size, &stack); } #if !SANITIZER_FREEBSD INTERCEPTOR(void *, pvalloc, SIZE_T size) { GET_MALLOC_STACK_TRACE; - uptr PageSize = GetPageSizeCached(); - size = RoundUpTo(size, PageSize); - if (size == 0) { - // pvalloc(0) should allocate one page. - size = PageSize; - } - void *ptr = MsanReallocate(&stack, nullptr, size, PageSize, false); - return ptr; + return msan_pvalloc(size, &stack); } #define MSAN_MAYBE_INTERCEPT_PVALLOC INTERCEPT_FUNCTION(pvalloc) #else @@ -853,7 +840,7 @@ INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) { if (UNLIKELY(!msan_inited)) // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym. return AllocateFromLocalPool(nmemb * size); - return MsanCalloc(&stack, nmemb, size); + return msan_calloc(nmemb, size, &stack); } INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) { @@ -866,12 +853,12 @@ INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) { new_ptr = AllocateFromLocalPool(copy_size); } else { copy_size = size; - new_ptr = MsanReallocate(&stack, nullptr, copy_size, sizeof(u64), false); + new_ptr = msan_malloc(copy_size, &stack); } internal_memcpy(new_ptr, ptr, copy_size); return new_ptr; } - return MsanReallocate(&stack, ptr, size, sizeof(u64), false); + return msan_realloc(ptr, size, &stack); } INTERCEPTOR(void *, malloc, SIZE_T size) { @@ -879,7 +866,7 @@ INTERCEPTOR(void *, malloc, SIZE_T size) { if (UNLIKELY(!msan_inited)) // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym. return AllocateFromLocalPool(size); - return MsanReallocate(&stack, nullptr, size, sizeof(u64), false); + return msan_malloc(size, &stack); } void __msan_allocated_memory(const void *data, uptr size) { diff --git a/contrib/compiler-rt/lib/msan/msan_new_delete.cc b/contrib/compiler-rt/lib/msan/msan_new_delete.cc index c7295feebfe4..721926791029 100644 --- a/contrib/compiler-rt/lib/msan/msan_new_delete.cc +++ b/contrib/compiler-rt/lib/msan/msan_new_delete.cc @@ -31,7 +31,7 @@ namespace std { // TODO(alekseys): throw std::bad_alloc instead of dying on OOM. #define OPERATOR_NEW_BODY(nothrow) \ GET_MALLOC_STACK_TRACE; \ - void *res = MsanReallocate(&stack, 0, size, sizeof(u64), false);\ + void *res = msan_malloc(size, &stack);\ if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\ return res diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cc index 2f8f6e3f9aa7..84f523c5e431 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cc @@ -14,6 +14,7 @@ #include "sanitizer_allocator.h" +#include "sanitizer_allocator_checks.h" #include "sanitizer_allocator_internal.h" #include "sanitizer_atomic.h" #include "sanitizer_common.h" @@ -160,7 +161,7 @@ void *InternalRealloc(void *addr, uptr size, InternalAllocatorCache *cache) { } void *InternalCalloc(uptr count, uptr size, InternalAllocatorCache *cache) { - if (CheckForCallocOverflow(count, size)) + if (UNLIKELY(CheckForCallocOverflow(count, size))) return InternalAllocator::FailureHandler::OnBadRequest(); void *p = InternalAlloc(count * size, cache); if (p) internal_memset(p, 0, count * size); @@ -202,12 +203,6 @@ void SetLowLevelAllocateCallback(LowLevelAllocateCallback callback) { low_level_alloc_callback = callback; } -bool CheckForCallocOverflow(uptr size, uptr n) { - if (!size) return false; - uptr max = (uptr)-1L; - return (max / size) < n; -} - static atomic_uint8_t allocator_out_of_memory = {0}; static atomic_uint8_t allocator_may_return_null = {0}; diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h index 0fb8a087ed6b..8c5696ea789c 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h @@ -56,11 +56,6 @@ struct NoOpMapUnmapCallback { // Callback type for iterating over chunks. typedef void (*ForEachChunkCallback)(uptr chunk, void *arg); -// Returns true if calloc(size, n) call overflows on size*n calculation. -// The caller should "return POLICY::OnBadRequest();" where POLICY is the -// current allocator failure handling policy. -bool CheckForCallocOverflow(uptr size, uptr n); - #include "sanitizer_allocator_size_class_map.h" #include "sanitizer_allocator_stats.h" #include "sanitizer_allocator_primary64.h" diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h new file mode 100644 index 000000000000..202916eae348 --- /dev/null +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h @@ -0,0 +1,64 @@ +//===-- sanitizer_allocator_checks.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Various checks shared between ThreadSanitizer, MemorySanitizer, etc. memory +// allocators. +// +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_ALLOCATOR_CHECKS_H +#define SANITIZER_ALLOCATOR_CHECKS_H + +#include "sanitizer_errno.h" +#include "sanitizer_internal_defs.h" +#include "sanitizer_common.h" +#include "sanitizer_platform.h" + +namespace __sanitizer { + +// A common errno setting logic shared by almost all sanitizer allocator APIs. +INLINE void *SetErrnoOnNull(void *ptr) { + if (UNLIKELY(!ptr)) + errno = errno_ENOMEM; + return ptr; +} + +// In case of the check failure, the caller of the following Check... functions +// should "return POLICY::OnBadRequest();" where POLICY is the current allocator +// failure handling policy. + +// Checks aligned_alloc() parameters, verifies that the alignment is a power of +// two and that the size is a multiple of alignment for POSIX implementation, +// and a bit relaxed requirement for non-POSIX ones, that the size is a multiple +// of alignment. +INLINE bool CheckAlignedAllocAlignmentAndSize(uptr alignment, uptr size) { +#if SANITIZER_POSIX + return IsPowerOfTwo(alignment) && (size & (alignment - 1)) == 0; +#else + return size % alignment == 0; +#endif +} + +// Checks posix_memalign() parameters, verifies that alignment is a power of two +// and a multiple of sizeof(void *). +INLINE bool CheckPosixMemalignAlignment(uptr alignment) { + return IsPowerOfTwo(alignment) && (alignment % sizeof(void *)) == 0; // NOLINT +} + +// Returns true if calloc(size, n) call overflows on size*n calculation. +INLINE bool CheckForCallocOverflow(uptr size, uptr n) { + if (!size) + return false; + uptr max = (uptr)-1L; + return (max / size) < n; +} + +} // namespace __sanitizer + +#endif // SANITIZER_ALLOCATOR_CHECKS_H diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_errno.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_errno.h index c405307ba8ec..7872b89c227c 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_errno.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_errno.h @@ -26,6 +26,8 @@ # define __errno_location __error #elif SANITIZER_ANDROID # define __errno_location __errno +#elif SANITIZER_WINDOWS +# define __errno_location _errno #endif extern "C" int *__errno_location(); diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc index a79a2a155db9..8c3c1e5d6a5d 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc @@ -629,8 +629,7 @@ uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) { } #endif -uptr internal_sigaltstack(const struct sigaltstack *ss, - struct sigaltstack *oss) { +uptr internal_sigaltstack(const void *ss, void *oss) { return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss); } diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.h index ee336f7ddff3..11cad6b80933 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -21,7 +21,6 @@ #include "sanitizer_platform_limits_posix.h" struct link_map; // Opaque type returned by dlopen(). -struct sigaltstack; namespace __sanitizer { // Dirent structure for getdents(). Note that this structure is different from @@ -30,8 +29,7 @@ struct linux_dirent; // Syscall wrappers. uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count); -uptr internal_sigaltstack(const struct sigaltstack* ss, - struct sigaltstack* oss); +uptr internal_sigaltstack(const void* ss, void* oss); uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset); diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc index 8df01815f9f7..1edd4157fd6b 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc @@ -805,14 +805,35 @@ char **GetArgv() { // fields only available in 10.12+. Declare the struct manually to be able to // build against older SDKs. struct __sanitizer_task_vm_info { - uptr _unused[(SANITIZER_WORDSIZE == 32) ? 20 : 19]; - uptr min_address; - uptr max_address; + mach_vm_size_t virtual_size; + integer_t region_count; + integer_t page_size; + mach_vm_size_t resident_size; + mach_vm_size_t resident_size_peak; + mach_vm_size_t device; + mach_vm_size_t device_peak; + mach_vm_size_t internal; + mach_vm_size_t internal_peak; + mach_vm_size_t external; + mach_vm_size_t external_peak; + mach_vm_size_t reusable; + mach_vm_size_t reusable_peak; + mach_vm_size_t purgeable_volatile_pmap; + mach_vm_size_t purgeable_volatile_resident; + mach_vm_size_t purgeable_volatile_virtual; + mach_vm_size_t compressed; + mach_vm_size_t compressed_peak; + mach_vm_size_t compressed_lifetime; + mach_vm_size_t phys_footprint; + mach_vm_address_t min_address; + mach_vm_address_t max_address; }; +#define __SANITIZER_TASK_VM_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(__sanitizer_task_vm_info) / sizeof(natural_t))) uptr GetTaskInfoMaxAddress() { - __sanitizer_task_vm_info vm_info = {{0}, 0, 0}; - mach_msg_type_number_t count = sizeof(vm_info) / sizeof(int); + __sanitizer_task_vm_info vm_info = {}; + mach_msg_type_number_t count = __SANITIZER_TASK_VM_INFO_COUNT; int err = task_info(mach_task_self(), TASK_VM_INFO, (int *)&vm_info, &count); if (err == 0) { return vm_info.max_address - 1; diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 49732aa32323..396f7c9346d6 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -13,7 +13,7 @@ #ifndef SANITIZER_PLATFORM_H #define SANITIZER_PLATFORM_H -#if !defined(__linux__) && !defined(__FreeBSD__) && \ +#if !defined(__linux__) && !defined(__FreeBSD__) && !defined(__NetBSD__) && \ !defined(__APPLE__) && !defined(_WIN32) # error "This operating system is not supported" #endif @@ -30,6 +30,12 @@ # define SANITIZER_FREEBSD 0 #endif +#if defined(__NetBSD__) +# define SANITIZER_NETBSD 1 +#else +# define SANITIZER_NETBSD 0 +#endif + #if defined(__APPLE__) # define SANITIZER_MAC 1 # include @@ -79,7 +85,8 @@ # define SANITIZER_ANDROID 0 #endif -#define SANITIZER_POSIX (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC) +#define SANITIZER_POSIX \ + (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC || SANITIZER_NETBSD) #if __LP64__ || defined(_WIN64) # define SANITIZER_WORDSIZE 64 diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 1bc43e817230..0380cee92a00 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -49,6 +49,12 @@ # define SI_FREEBSD 0 #endif +#if SANITIZER_NETBSD +# define SI_NETBSD 1 +#else +# define SI_NETBSD 0 +#endif + #if SANITIZER_LINUX # define SI_LINUX 1 #else @@ -109,9 +115,9 @@ // memmem on Darwin doesn't exist on 10.6 // FIXME: enable memmem on Windows. #define SANITIZER_INTERCEPT_MEMMEM \ - SI_NOT_WINDOWS && !SI_MAC_DEPLOYMENT_BELOW_10_7 + (SI_NOT_WINDOWS && !SI_MAC_DEPLOYMENT_BELOW_10_7) #define SANITIZER_INTERCEPT_MEMCHR 1 -#define SANITIZER_INTERCEPT_MEMRCHR SI_FREEBSD || SI_LINUX +#define SANITIZER_INTERCEPT_MEMRCHR (SI_FREEBSD || SI_LINUX || SI_NETBSD) #define SANITIZER_INTERCEPT_READ SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_PREAD SI_NOT_WINDOWS @@ -127,7 +133,8 @@ #define SANITIZER_INTERCEPT_READV SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_WRITEV SI_NOT_WINDOWS -#define SANITIZER_INTERCEPT_PREADV SI_FREEBSD || SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_PREADV \ + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_PWRITEV SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_PREADV64 SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_PWRITEV64 SI_LINUX_NOT_ANDROID @@ -142,7 +149,7 @@ #ifndef SANITIZER_INTERCEPT_PRINTF # define SANITIZER_INTERCEPT_PRINTF SI_NOT_WINDOWS -# define SANITIZER_INTERCEPT_PRINTF_L SI_FREEBSD +# define SANITIZER_INTERCEPT_PRINTF_L (SI_FREEBSD || SI_NETBSD) # define SANITIZER_INTERCEPT_ISOC99_PRINTF SI_LINUX_NOT_ANDROID #endif @@ -151,13 +158,14 @@ #define SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_GETPWNAM_R_AND_FRIENDS \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_GETPWENT \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_GETPWENT_R SI_FREEBSD || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_SETPWENT SI_MAC || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_CLOCK_GETTIME SI_FREEBSD || SI_LINUX +#define SANITIZER_INTERCEPT_GETPWENT_R \ + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_SETPWENT (SI_MAC || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_CLOCK_GETTIME (SI_FREEBSD || SI_NETBSD || SI_LINUX) #define SANITIZER_INTERCEPT_GETITIMER SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_TIME SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID @@ -168,10 +176,11 @@ #define SANITIZER_INTERCEPT_GETNAMEINFO SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_GETSOCKNAME SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_GETHOSTBYNAME SI_NOT_WINDOWS -#define SANITIZER_INTERCEPT_GETHOSTBYNAME_R SI_FREEBSD || SI_LINUX -#define SANITIZER_INTERCEPT_GETHOSTBYNAME2_R SI_FREEBSD || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_GETHOSTBYADDR_R SI_FREEBSD || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_GETHOSTENT_R SI_FREEBSD || SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_GETHOSTBYNAME_R (SI_FREEBSD || SI_LINUX) +#define SANITIZER_INTERCEPT_GETHOSTBYNAME2_R \ + (SI_FREEBSD || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_GETHOSTBYADDR_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_GETHOSTENT_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_GETSOCKOPT SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_ACCEPT SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_ACCEPT4 SI_LINUX_NOT_ANDROID @@ -197,63 +206,67 @@ #define SANITIZER_INTERCEPT_GET_CURRENT_DIR_NAME SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_STRTOIMAX SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_MBSTOWCS SI_NOT_WINDOWS -#define SANITIZER_INTERCEPT_MBSNRTOWCS SI_MAC || SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_MBSNRTOWCS (SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_WCSTOMBS SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_WCSNRTOMBS \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_WCRTOMB \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_TCGETATTR SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_REALPATH SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_CONFSTR \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_SCHED_GETAFFINITY SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_SCHED_GETPARAM SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_STRERROR SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_STRERROR_R SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_XPG_STRERROR_R SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_SCANDIR \ - SI_FREEBSD || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_SCANDIR64 SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_GETGROUPS SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_POLL SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_PPOLL SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_WORDEXP \ - SI_FREEBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_SIGWAIT SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_SIGWAITINFO SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_SIGTIMEDWAIT SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_SIGSETOPS \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_SIGPENDING SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_SIGPROCMASK SI_NOT_WINDOWS -#define SANITIZER_INTERCEPT_BACKTRACE SI_FREEBSD || SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_BACKTRACE \ + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_GETMNTENT SI_LINUX #define SANITIZER_INTERCEPT_GETMNTENT_R SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_STATFS SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_STATFS \ + (SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_STATFS64 \ - (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_STATVFS SI_FREEBSD || SI_LINUX_NOT_ANDROID + ((SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_STATVFS \ + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_STATVFS64 SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_INITGROUPS SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_ETHER_NTOA_ATON SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_ETHER_HOST \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_ETHER_R SI_FREEBSD || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_SHMCTL \ - ((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && SANITIZER_WORDSIZE == 64) + (SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_ETHER_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_SHMCTL \ + (SI_NETBSD || ((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && \ + SANITIZER_WORDSIZE == 64)) // NOLINT #define SANITIZER_INTERCEPT_RANDOM_R SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GET SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSCHED \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETAFFINITY_NP SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPROTOCOL \ - SI_MAC || SI_LINUX_NOT_ANDROID + (SI_MAC || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPRIOCEILING \ - SI_MAC || SI_LINUX_NOT_ANDROID + (SI_MAC || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST_NP SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED SI_NOT_WINDOWS @@ -268,33 +281,36 @@ #define SANITIZER_INTERCEPT_SINCOS SI_LINUX #define SANITIZER_INTERCEPT_REMQUO SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_LGAMMA SI_NOT_WINDOWS -#define SANITIZER_INTERCEPT_LGAMMA_R SI_FREEBSD || SI_LINUX +#define SANITIZER_INTERCEPT_LGAMMA_R (SI_FREEBSD || SI_LINUX) #define SANITIZER_INTERCEPT_LGAMMAL_R SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_DRAND48_R SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_RAND_R \ - SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_ICONV SI_FREEBSD || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID) +#define SANITIZER_INTERCEPT_ICONV \ + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_TIMES SI_NOT_WINDOWS // FIXME: getline seems to be available on OSX 10.7 -#define SANITIZER_INTERCEPT_GETLINE SI_FREEBSD || SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_GETLINE \ + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) -#define SANITIZER_INTERCEPT__EXIT SI_LINUX || SI_FREEBSD || SI_MAC +#define SANITIZER_INTERCEPT__EXIT \ + (SI_LINUX || SI_FREEBSD || SI_NETBSD || SI_MAC) #define SANITIZER_INTERCEPT_PHTREAD_MUTEX SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP \ - SI_FREEBSD || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_TLS_GET_ADDR \ - SI_FREEBSD || SI_LINUX_NOT_ANDROID + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_LISTXATTR SI_LINUX #define SANITIZER_INTERCEPT_GETXATTR SI_LINUX #define SANITIZER_INTERCEPT_GETRESID SI_LINUX #define SANITIZER_INTERCEPT_GETIFADDRS \ - SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC) #define SANITIZER_INTERCEPT_IF_INDEXTONAME \ - SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC) #define SANITIZER_INTERCEPT_CAPGET SI_LINUX_NOT_ANDROID #if SI_LINUX && defined(__arm__) #define SANITIZER_INTERCEPT_AEABI_MEM 1 @@ -302,55 +318,61 @@ #define SANITIZER_INTERCEPT_AEABI_MEM 0 #endif #define SANITIZER_INTERCEPT___BZERO SI_MAC -#define SANITIZER_INTERCEPT_FTIME !SI_FREEBSD && SI_NOT_WINDOWS +#define SANITIZER_INTERCEPT_FTIME (!SI_FREEBSD && !SI_NETBSD && SI_NOT_WINDOWS) #define SANITIZER_INTERCEPT_XDR SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_TSEARCH SI_LINUX_NOT_ANDROID || SI_MAC +#define SANITIZER_INTERCEPT_TSEARCH \ + (SI_LINUX_NOT_ANDROID || SI_MAC || SI_NETBSD) #define SANITIZER_INTERCEPT_LIBIO_INTERNALS SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_FOPEN SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_FOPEN64 SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_OPEN_MEMSTREAM SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_OPEN_MEMSTREAM (SI_LINUX_NOT_ANDROID || SI_NETBSD) #define SANITIZER_INTERCEPT_OBSTACK SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_FFLUSH SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_FCLOSE SI_NOT_WINDOWS #ifndef SANITIZER_INTERCEPT_DLOPEN_DLCLOSE #define SANITIZER_INTERCEPT_DLOPEN_DLCLOSE \ - SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC) #endif -#define SANITIZER_INTERCEPT_GETPASS SI_LINUX_NOT_ANDROID || SI_MAC +#define SANITIZER_INTERCEPT_GETPASS \ + (SI_LINUX_NOT_ANDROID || SI_MAC || SI_NETBSD) #define SANITIZER_INTERCEPT_TIMERFD SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_MLOCKX SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_FOPENCOOKIE SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_SEM SI_LINUX || SI_FREEBSD +#define SANITIZER_INTERCEPT_SEM (SI_LINUX || SI_FREEBSD || SI_NETBSD) #define SANITIZER_INTERCEPT_PTHREAD_SETCANCEL SI_NOT_WINDOWS -#define SANITIZER_INTERCEPT_MINCORE SI_LINUX +#define SANITIZER_INTERCEPT_MINCORE (SI_LINUX || SI_NETBSD) #define SANITIZER_INTERCEPT_PROCESS_VM_READV SI_LINUX -#define SANITIZER_INTERCEPT_CTERMID SI_LINUX || SI_MAC || SI_FREEBSD -#define SANITIZER_INTERCEPT_CTERMID_R SI_MAC || SI_FREEBSD +#define SANITIZER_INTERCEPT_CTERMID \ + (SI_LINUX || SI_MAC || SI_FREEBSD || SI_NETBSD) +#define SANITIZER_INTERCEPT_CTERMID_R (SI_MAC || SI_FREEBSD) -#define SANITIZER_INTERCEPTOR_HOOKS SI_LINUX || SI_MAC || SI_WINDOWS +#define SANITIZER_INTERCEPTOR_HOOKS (SI_LINUX || SI_MAC || SI_WINDOWS) #define SANITIZER_INTERCEPT_RECV_RECVFROM SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_SEND_SENDTO SI_NOT_WINDOWS #define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX -#define SANITIZER_INTERCEPT_STAT (SI_FREEBSD || SI_MAC || SI_ANDROID) -#define SANITIZER_INTERCEPT___XSTAT !SANITIZER_INTERCEPT_STAT && SI_NOT_WINDOWS +#define SANITIZER_INTERCEPT_STAT \ + (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD) +#define SANITIZER_INTERCEPT___XSTAT \ + (!SANITIZER_INTERCEPT_STAT && SI_NOT_WINDOWS) #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID -#define SANITIZER_INTERCEPT_UTMP SI_NOT_WINDOWS && !SI_MAC && !SI_FREEBSD -#define SANITIZER_INTERCEPT_UTMPX SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD +#define SANITIZER_INTERCEPT_UTMP (SI_NOT_WINDOWS && !SI_MAC && !SI_FREEBSD) +#define SANITIZER_INTERCEPT_UTMPX (SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD) #define SANITIZER_INTERCEPT_GETLOADAVG \ - SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD + (SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD || SI_NETBSD) -#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO (!SI_FREEBSD && !SI_MAC) -#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC) -#define SANITIZER_INTERCEPT_PVALLOC (!SI_FREEBSD && !SI_MAC) -#define SANITIZER_INTERCEPT_CFREE (!SI_FREEBSD && !SI_MAC) +#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO \ + (!SI_FREEBSD && !SI_MAC && !SI_NETBSD) +#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC && !SI_NETBSD) +#define SANITIZER_INTERCEPT_PVALLOC (!SI_FREEBSD && !SI_MAC && !SI_NETBSD) +#define SANITIZER_INTERCEPT_CFREE (!SI_FREEBSD && !SI_MAC && !SI_NETBSD) #define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC) #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC) #define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc index 03f73ae88308..d7fa5f6451d1 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc @@ -287,7 +287,7 @@ static int TracerThread(void* argument) { // Alternate stack for signal handling. InternalScopedBuffer handler_stack_memory(kHandlerStackSize); - struct sigaltstack handler_stack; + stack_t handler_stack; internal_memset(&handler_stack, 0, sizeof(handler_stack)); handler_stack.ss_sp = handler_stack_memory.data(); handler_stack.ss_size = kHandlerStackSize; diff --git a/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp b/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp index ec9132f90a4f..6f30ee987513 100644 --- a/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp +++ b/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp @@ -19,10 +19,11 @@ #include "scudo_tls.h" #include "scudo_utils.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" #include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_errno.h" #include "sanitizer_common/sanitizer_quarantine.h" -#include #include namespace __scudo { @@ -73,7 +74,7 @@ struct ScudoChunk : UnpackedHeader { // beginning of the user data to the end of the backend allocated chunk. uptr getUsableSize(UnpackedHeader *Header) { uptr Size = - getBackendAllocator().GetActuallyAllocatedSize(getAllocBeg(Header), + getBackendAllocator().getActuallyAllocatedSize(getAllocBeg(Header), Header->FromPrimary); if (Size == 0) return 0; @@ -232,7 +233,10 @@ struct QuarantineCallback { } Chunk->eraseHeader(); void *Ptr = Chunk->getAllocBeg(&Header); - getBackendAllocator().Deallocate(Cache_, Ptr, Header.FromPrimary); + if (Header.FromPrimary) + getBackendAllocator().deallocatePrimary(Cache_, Ptr); + else + getBackendAllocator().deallocateSecondary(Ptr); } // Internal quarantine allocation and deallocation functions. We first check @@ -240,11 +244,11 @@ struct QuarantineCallback { // TODO(kostyak): figure out the best way to protect the batches. COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize); void *Allocate(uptr Size) { - return getBackendAllocator().Allocate(Cache_, Size, MinAlignment, true); + return getBackendAllocator().allocatePrimary(Cache_, Size); } void Deallocate(void *Ptr) { - getBackendAllocator().Deallocate(Cache_, Ptr, true); + getBackendAllocator().deallocatePrimary(Cache_, Ptr); } AllocatorCache *Cache_; @@ -277,6 +281,9 @@ struct ScudoAllocator { ScudoBackendAllocator BackendAllocator; ScudoQuarantine AllocatorQuarantine; + StaticSpinMutex GlobalPrngMutex; + ScudoPrng GlobalPrng; + // The fallback caches are used when the thread local caches have been // 'detroyed' on thread tear-down. They are protected by a Mutex as they can // be accessed by different threads. @@ -303,10 +310,10 @@ struct ScudoAllocator { // result, the maximum offset will be at most the maximum alignment for the // last size class minus the header size, in multiples of MinAlignment. UnpackedHeader Header = {}; - uptr MaxPrimaryAlignment = 1 << MostSignificantSetBitIndex( - SizeClassMap::kMaxSize - MinAlignment); - uptr MaxOffset = (MaxPrimaryAlignment - AlignedChunkHeaderSize) >> - MinAlignmentLog; + uptr MaxPrimaryAlignment = + 1 << MostSignificantSetBitIndex(SizeClassMap::kMaxSize - MinAlignment); + uptr MaxOffset = + (MaxPrimaryAlignment - AlignedChunkHeaderSize) >> MinAlignmentLog; Header.Offset = MaxOffset; if (Header.Offset != MaxOffset) { dieWithMessage("ERROR: the maximum possible offset doesn't fit in the " @@ -328,13 +335,14 @@ struct ScudoAllocator { DeleteSizeMismatch = Options.DeleteSizeMismatch; ZeroContents = Options.ZeroContents; SetAllocatorMayReturnNull(Options.MayReturnNull); - BackendAllocator.Init(Options.ReleaseToOSIntervalMs); + BackendAllocator.init(Options.ReleaseToOSIntervalMs); AllocatorQuarantine.Init( static_cast(Options.QuarantineSizeMb) << 20, static_cast(Options.ThreadLocalQuarantineSizeKb) << 10); - BackendAllocator.InitCache(&FallbackAllocatorCache); + GlobalPrng.init(); + Cookie = GlobalPrng.getU64(); + BackendAllocator.initCache(&FallbackAllocatorCache); FallbackPrng.init(); - Cookie = FallbackPrng.getU64(); } // Helper function that checks for a valid Scudo chunk. nullptr isn't. @@ -374,28 +382,36 @@ struct ScudoAllocator { void *Ptr; u8 Salt; - uptr AllocationSize = FromPrimary ? AlignedSize : NeededSize; - uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment; - ScudoThreadContext *ThreadContext = getThreadContextAndLock(); - if (LIKELY(ThreadContext)) { - Salt = getPrng(ThreadContext)->getU8(); - Ptr = BackendAllocator.Allocate(getAllocatorCache(ThreadContext), - AllocationSize, AllocationAlignment, - FromPrimary); - ThreadContext->unlock(); + uptr AllocSize; + if (FromPrimary) { + AllocSize = AlignedSize; + ScudoThreadContext *ThreadContext = getThreadContextAndLock(); + if (LIKELY(ThreadContext)) { + Salt = getPrng(ThreadContext)->getU8(); + Ptr = BackendAllocator.allocatePrimary(getAllocatorCache(ThreadContext), + AllocSize); + ThreadContext->unlock(); + } else { + SpinMutexLock l(&FallbackMutex); + Salt = FallbackPrng.getU8(); + Ptr = BackendAllocator.allocatePrimary(&FallbackAllocatorCache, + AllocSize); + } } else { - SpinMutexLock l(&FallbackMutex); - Salt = FallbackPrng.getU8(); - Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, AllocationSize, - AllocationAlignment, FromPrimary); + { + SpinMutexLock l(&GlobalPrngMutex); + Salt = GlobalPrng.getU8(); + } + AllocSize = NeededSize; + Ptr = BackendAllocator.allocateSecondary(AllocSize, Alignment); } if (UNLIKELY(!Ptr)) return FailureHandler::OnOOM(); // If requested, we will zero out the entire contents of the returned chunk. if ((ForceZeroContents || ZeroContents) && FromPrimary) - memset(Ptr, 0, - BackendAllocator.GetActuallyAllocatedSize(Ptr, FromPrimary)); + memset(Ptr, 0, BackendAllocator.getActuallyAllocatedSize( + Ptr, /*FromPrimary=*/true)); UnpackedHeader Header = {}; uptr AllocBeg = reinterpret_cast(Ptr); @@ -409,11 +425,11 @@ struct ScudoAllocator { uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg; Header.Offset = Offset >> MinAlignmentLog; } - CHECK_LE(UserBeg + Size, AllocBeg + AllocationSize); + CHECK_LE(UserBeg + Size, AllocBeg + AllocSize); Header.State = ChunkAllocated; Header.AllocType = Type; if (FromPrimary) { - Header.FromPrimary = FromPrimary; + Header.FromPrimary = 1; Header.SizeOrUnusedBytes = Size; } else { // The secondary fits the allocations to a page, so the amount of unused @@ -424,7 +440,7 @@ struct ScudoAllocator { if (TrailingBytes) Header.SizeOrUnusedBytes = PageSize - TrailingBytes; } - Header.Salt = static_cast(Salt); + Header.Salt = Salt; getScudoChunk(UserBeg)->storeHeader(&Header); void *UserPtr = reinterpret_cast(UserBeg); // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size); @@ -442,15 +458,18 @@ struct ScudoAllocator { if (BypassQuarantine) { Chunk->eraseHeader(); void *Ptr = Chunk->getAllocBeg(Header); - ScudoThreadContext *ThreadContext = getThreadContextAndLock(); - if (LIKELY(ThreadContext)) { - getBackendAllocator().Deallocate(getAllocatorCache(ThreadContext), Ptr, - FromPrimary); - ThreadContext->unlock(); + if (FromPrimary) { + ScudoThreadContext *ThreadContext = getThreadContextAndLock(); + if (LIKELY(ThreadContext)) { + getBackendAllocator().deallocatePrimary( + getAllocatorCache(ThreadContext), Ptr); + ThreadContext->unlock(); + } else { + SpinMutexLock Lock(&FallbackMutex); + getBackendAllocator().deallocatePrimary(&FallbackAllocatorCache, Ptr); + } } else { - SpinMutexLock Lock(&FallbackMutex); - getBackendAllocator().Deallocate(&FallbackAllocatorCache, Ptr, - FromPrimary); + getBackendAllocator().deallocateSecondary(Ptr); } } else { UnpackedHeader NewHeader = *Header; @@ -580,7 +599,7 @@ struct ScudoAllocator { void *calloc(uptr NMemB, uptr Size) { initThreadMaybe(); - if (CheckForCallocOverflow(NMemB, Size)) + if (UNLIKELY(CheckForCallocOverflow(NMemB, Size))) return FailureHandler::OnBadRequest(); return allocate(NMemB * Size, MinAlignment, FromMalloc, true); } @@ -589,13 +608,13 @@ struct ScudoAllocator { AllocatorCache *Cache = getAllocatorCache(ThreadContext); AllocatorQuarantine.Drain(getQuarantineCache(ThreadContext), QuarantineCallback(Cache)); - BackendAllocator.DestroyCache(Cache); + BackendAllocator.destroyCache(Cache); } uptr getStats(AllocatorStat StatType) { initThreadMaybe(); uptr stats[AllocatorStatCount]; - BackendAllocator.GetStats(stats); + BackendAllocator.getStats(stats); return stats[StatType]; } }; @@ -611,7 +630,7 @@ static void initScudoInternal(const AllocatorOptions &Options) { } void ScudoThreadContext::init() { - getBackendAllocator().InitCache(&Cache); + getBackendAllocator().initCache(&Cache); Prng.init(); memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder)); } @@ -621,7 +640,7 @@ void ScudoThreadContext::commitBack() { } void *scudoMalloc(uptr Size, AllocType Type) { - return Instance.allocate(Size, MinAlignment, Type); + return SetErrnoOnNull(Instance.allocate(Size, MinAlignment, Type)); } void scudoFree(void *Ptr, AllocType Type) { @@ -634,54 +653,56 @@ void scudoSizedFree(void *Ptr, uptr Size, AllocType Type) { void *scudoRealloc(void *Ptr, uptr Size) { if (!Ptr) - return Instance.allocate(Size, MinAlignment, FromMalloc); + return SetErrnoOnNull(Instance.allocate(Size, MinAlignment, FromMalloc)); if (Size == 0) { Instance.deallocate(Ptr, 0, FromMalloc); return nullptr; } - return Instance.reallocate(Ptr, Size); + return SetErrnoOnNull(Instance.reallocate(Ptr, Size)); } void *scudoCalloc(uptr NMemB, uptr Size) { - return Instance.calloc(NMemB, Size); + return SetErrnoOnNull(Instance.calloc(NMemB, Size)); } void *scudoValloc(uptr Size) { - return Instance.allocate(Size, GetPageSizeCached(), FromMemalign); + return SetErrnoOnNull( + Instance.allocate(Size, GetPageSizeCached(), FromMemalign)); } void *scudoPvalloc(uptr Size) { uptr PageSize = GetPageSizeCached(); - Size = RoundUpTo(Size, PageSize); - if (Size == 0) { - // pvalloc(0) should allocate one page. - Size = PageSize; - } - return Instance.allocate(Size, PageSize, FromMemalign); + // pvalloc(0) should allocate one page. + Size = Size ? RoundUpTo(Size, PageSize) : PageSize; + return SetErrnoOnNull(Instance.allocate(Size, PageSize, FromMemalign)); } void *scudoMemalign(uptr Alignment, uptr Size) { - if (UNLIKELY(!IsPowerOfTwo(Alignment))) + if (UNLIKELY(!IsPowerOfTwo(Alignment))) { + errno = errno_EINVAL; return ScudoAllocator::FailureHandler::OnBadRequest(); - return Instance.allocate(Size, Alignment, FromMemalign); + } + return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMemalign)); } int scudoPosixMemalign(void **MemPtr, uptr Alignment, uptr Size) { - if (UNLIKELY(!IsPowerOfTwo(Alignment) || (Alignment % sizeof(void *)) != 0)) { - *MemPtr = ScudoAllocator::FailureHandler::OnBadRequest(); - return EINVAL; + if (UNLIKELY(!CheckPosixMemalignAlignment(Alignment))) { + ScudoAllocator::FailureHandler::OnBadRequest(); + return errno_EINVAL; } - *MemPtr = Instance.allocate(Size, Alignment, FromMemalign); - if (!*MemPtr) - return ENOMEM; + void *Ptr = Instance.allocate(Size, Alignment, FromMemalign); + if (UNLIKELY(!Ptr)) + return errno_ENOMEM; + *MemPtr = Ptr; return 0; } void *scudoAlignedAlloc(uptr Alignment, uptr Size) { - // Alignment must be a power of 2, Size must be a multiple of Alignment. - if (UNLIKELY(!IsPowerOfTwo(Alignment) || (Size & (Alignment - 1)) != 0)) + if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(Alignment, Size))) { + errno = errno_EINVAL; return ScudoAllocator::FailureHandler::OnBadRequest(); - return Instance.allocate(Size, Alignment, FromMalloc); + } + return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMalloc)); } uptr scudoMallocUsableSize(void *Ptr) { diff --git a/contrib/compiler-rt/lib/scudo/scudo_allocator_combined.h b/contrib/compiler-rt/lib/scudo/scudo_allocator_combined.h index 818272868880..7599c12abb6d 100644 --- a/contrib/compiler-rt/lib/scudo/scudo_allocator_combined.h +++ b/contrib/compiler-rt/lib/scudo/scudo_allocator_combined.h @@ -23,41 +23,47 @@ template class ScudoCombinedAllocator { public: - void Init(s32 ReleaseToOSIntervalMs) { + void init(s32 ReleaseToOSIntervalMs) { Primary.Init(ReleaseToOSIntervalMs); Secondary.Init(); Stats.Init(); } - void *Allocate(AllocatorCache *Cache, uptr Size, uptr Alignment, - bool FromPrimary) { - if (FromPrimary) - return Cache->Allocate(&Primary, Primary.ClassID(Size)); + // Primary allocations are always MinAlignment aligned, and as such do not + // require an Alignment parameter. + void *allocatePrimary(AllocatorCache *Cache, uptr Size) { + return Cache->Allocate(&Primary, Primary.ClassID(Size)); + } + + // Secondary allocations do not require a Cache, but do require an Alignment + // parameter. + void *allocateSecondary(uptr Size, uptr Alignment) { return Secondary.Allocate(&Stats, Size, Alignment); } - void Deallocate(AllocatorCache *Cache, void *Ptr, bool FromPrimary) { - if (FromPrimary) - Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr); - else - Secondary.Deallocate(&Stats, Ptr); + void deallocatePrimary(AllocatorCache *Cache, void *Ptr) { + Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr); } - uptr GetActuallyAllocatedSize(void *Ptr, bool FromPrimary) { + void deallocateSecondary(void *Ptr) { + Secondary.Deallocate(&Stats, Ptr); + } + + uptr getActuallyAllocatedSize(void *Ptr, bool FromPrimary) { if (FromPrimary) return PrimaryAllocator::ClassIdToSize(Primary.GetSizeClass(Ptr)); return Secondary.GetActuallyAllocatedSize(Ptr); } - void InitCache(AllocatorCache *Cache) { + void initCache(AllocatorCache *Cache) { Cache->Init(&Stats); } - void DestroyCache(AllocatorCache *Cache) { + void destroyCache(AllocatorCache *Cache) { Cache->Destroy(&Primary, &Stats); } - void GetStats(AllocatorStatCounters StatType) const { + void getStats(AllocatorStatCounters StatType) const { Stats.Get(StatType); } diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc index 9ee9104283f8..ef984a45cd9d 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc @@ -61,20 +61,13 @@ // an exclusive lock; ThreadClock's are private to respective threads and so // do not need any protection. // -// Description of ThreadClock state: -// clk_ - fixed size vector clock. -// nclk_ - effective size of the vector clock (the rest is zeros). -// tid_ - index of the thread associated with he clock ("current thread"). -// last_acquire_ - current thread time when it acquired something from -// other threads. -// // Description of SyncClock state: // clk_ - variable size vector clock, low kClkBits hold timestamp, // the remaining bits hold "acquired" flag (the actual value is thread's // reused counter); // if acquried == thr->reused_, then the respective thread has already -// acquired this clock (except possibly dirty_tids_). -// dirty_tids_ - holds up to two indeces in the vector clock that other threads +// acquired this clock (except possibly for dirty elements). +// dirty_ - holds up to two indeces in the vector clock that other threads // need to acquire regardless of "acquired" flag value; // release_store_tid_ - denotes that the clock state is a result of // release-store operation by the thread with release_store_tid_ index. @@ -90,21 +83,51 @@ namespace __tsan { +static atomic_uint32_t *ref_ptr(ClockBlock *cb) { + return reinterpret_cast(&cb->table[ClockBlock::kRefIdx]); +} + +// Drop reference to the first level block idx. +static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) { + ClockBlock *cb = ctx->clock_alloc.Map(idx); + atomic_uint32_t *ref = ref_ptr(cb); + u32 v = atomic_load(ref, memory_order_acquire); + for (;;) { + CHECK_GT(v, 0); + if (v == 1) + break; + if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel)) + return; + } + // First level block owns second level blocks, so them as well. + for (uptr i = 0; i < blocks; i++) + ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]); + ctx->clock_alloc.Free(c, idx); +} + ThreadClock::ThreadClock(unsigned tid, unsigned reused) : tid_(tid) - , reused_(reused + 1) { // 0 has special meaning + , reused_(reused + 1) // 0 has special meaning + , cached_idx_() + , cached_size_() + , cached_blocks_() { CHECK_LT(tid, kMaxTidInClock); CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); nclk_ = tid_ + 1; last_acquire_ = 0; internal_memset(clk_, 0, sizeof(clk_)); - clk_[tid_].reused = reused_; } void ThreadClock::ResetCached(ClockCache *c) { + if (cached_idx_) { + UnrefClockBlock(c, cached_idx_, cached_blocks_); + cached_idx_ = 0; + cached_size_ = 0; + cached_blocks_ = 0; + } } -void ThreadClock::acquire(ClockCache *c, const SyncClock *src) { +void ThreadClock::acquire(ClockCache *c, SyncClock *src) { DCHECK_LE(nclk_, kMaxTid); DCHECK_LE(src->size_, kMaxTid); CPP_STAT_INC(StatClockAcquire); @@ -116,50 +139,46 @@ void ThreadClock::acquire(ClockCache *c, const SyncClock *src) { return; } - // Check if we've already acquired src after the last release operation on src bool acquired = false; - if (nclk > tid_) { - if (src->elem(tid_).reused == reused_) { - for (unsigned i = 0; i < kDirtyTids; i++) { - unsigned tid = src->dirty_tids_[i]; - if (tid != kInvalidTid) { - u64 epoch = src->elem(tid).epoch; - if (clk_[tid].epoch < epoch) { - clk_[tid].epoch = epoch; - acquired = true; - } - } + for (unsigned i = 0; i < kDirtyTids; i++) { + SyncClock::Dirty dirty = src->dirty_[i]; + unsigned tid = dirty.tid; + if (tid != kInvalidTid) { + if (clk_[tid] < dirty.epoch) { + clk_[tid] = dirty.epoch; + acquired = true; } - if (acquired) { - CPP_STAT_INC(StatClockAcquiredSomething); - last_acquire_ = clk_[tid_].epoch; - } - return; } } - // O(N) acquire. - CPP_STAT_INC(StatClockAcquireFull); - nclk_ = max(nclk_, nclk); - for (uptr i = 0; i < nclk; i++) { - u64 epoch = src->elem(i).epoch; - if (clk_[i].epoch < epoch) { - clk_[i].epoch = epoch; - acquired = true; + // Check if we've already acquired src after the last release operation on src + if (tid_ >= nclk || src->elem(tid_).reused != reused_) { + // O(N) acquire. + CPP_STAT_INC(StatClockAcquireFull); + nclk_ = max(nclk_, nclk); + u64 *dst_pos = &clk_[0]; + for (ClockElem &src_elem : *src) { + u64 epoch = src_elem.epoch; + if (*dst_pos < epoch) { + *dst_pos = epoch; + acquired = true; + } + dst_pos++; } - } - // Remember that this thread has acquired this clock. - if (nclk > tid_) - src->elem(tid_).reused = reused_; + // Remember that this thread has acquired this clock. + if (nclk > tid_) + src->elem(tid_).reused = reused_; + } if (acquired) { CPP_STAT_INC(StatClockAcquiredSomething); - last_acquire_ = clk_[tid_].epoch; + last_acquire_ = clk_[tid_]; + ResetCached(c); } } -void ThreadClock::release(ClockCache *c, SyncClock *dst) const { +void ThreadClock::release(ClockCache *c, SyncClock *dst) { DCHECK_LE(nclk_, kMaxTid); DCHECK_LE(dst->size_, kMaxTid); @@ -179,7 +198,7 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const { // since the last release on dst. If so, we need to update // only dst->elem(tid_). if (dst->elem(tid_).epoch > last_acquire_) { - UpdateCurrentThread(dst); + UpdateCurrentThread(c, dst); if (dst->release_store_tid_ != tid_ || dst->release_store_reused_ != reused_) dst->release_store_tid_ = kInvalidTid; @@ -188,23 +207,24 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const { // O(N) release. CPP_STAT_INC(StatClockReleaseFull); + dst->Unshare(c); // First, remember whether we've acquired dst. bool acquired = IsAlreadyAcquired(dst); if (acquired) CPP_STAT_INC(StatClockReleaseAcquired); // Update dst->clk_. - for (uptr i = 0; i < nclk_; i++) { - ClockElem &ce = dst->elem(i); - ce.epoch = max(ce.epoch, clk_[i].epoch); + dst->FlushDirty(); + uptr i = 0; + for (ClockElem &ce : *dst) { + ce.epoch = max(ce.epoch, clk_[i]); ce.reused = 0; + i++; } // Clear 'acquired' flag in the remaining elements. if (nclk_ < dst->size_) CPP_STAT_INC(StatClockReleaseClearTail); for (uptr i = nclk_; i < dst->size_; i++) dst->elem(i).reused = 0; - for (unsigned i = 0; i < kDirtyTids; i++) - dst->dirty_tids_[i] = kInvalidTid; dst->release_store_tid_ = kInvalidTid; dst->release_store_reused_ = 0; // If we've acquired dst, remember this fact, @@ -213,11 +233,37 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const { dst->elem(tid_).reused = reused_; } -void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const { +void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) { DCHECK_LE(nclk_, kMaxTid); DCHECK_LE(dst->size_, kMaxTid); CPP_STAT_INC(StatClockStore); + if (dst->size_ == 0 && cached_idx_ != 0) { + // Reuse the cached clock. + // Note: we could reuse/cache the cached clock in more cases: + // we could update the existing clock and cache it, or replace it with the + // currently cached clock and release the old one. And for a shared + // existing clock, we could replace it with the currently cached; + // or unshare, update and cache. But, for simplicity, we currnetly reuse + // cached clock only when the target clock is empty. + dst->tab_ = ctx->clock_alloc.Map(cached_idx_); + dst->tab_idx_ = cached_idx_; + dst->size_ = cached_size_; + dst->blocks_ = cached_blocks_; + CHECK_EQ(dst->dirty_[0].tid, kInvalidTid); + // The cached clock is shared (immutable), + // so this is where we store the current clock. + dst->dirty_[0].tid = tid_; + dst->dirty_[0].epoch = clk_[tid_]; + dst->release_store_tid_ = tid_; + dst->release_store_reused_ = reused_; + // Rememeber that we don't need to acquire it in future. + dst->elem(tid_).reused = reused_; + // Grab a reference. + atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed); + return; + } + // Check if we need to resize dst. if (dst->size_ < nclk_) dst->Resize(c, nclk_); @@ -226,32 +272,41 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const { dst->release_store_reused_ == reused_ && dst->elem(tid_).epoch > last_acquire_) { CPP_STAT_INC(StatClockStoreFast); - UpdateCurrentThread(dst); + UpdateCurrentThread(c, dst); return; } // O(N) release-store. CPP_STAT_INC(StatClockStoreFull); - for (uptr i = 0; i < nclk_; i++) { - ClockElem &ce = dst->elem(i); - ce.epoch = clk_[i].epoch; + dst->Unshare(c); + // Note: dst can be larger than this ThreadClock. + // This is fine since clk_ beyond size is all zeros. + uptr i = 0; + for (ClockElem &ce : *dst) { + ce.epoch = clk_[i]; ce.reused = 0; + i++; } - // Clear the tail of dst->clk_. - if (nclk_ < dst->size_) { - for (uptr i = nclk_; i < dst->size_; i++) { - ClockElem &ce = dst->elem(i); - ce.epoch = 0; - ce.reused = 0; - } - CPP_STAT_INC(StatClockStoreTail); - } - for (unsigned i = 0; i < kDirtyTids; i++) - dst->dirty_tids_[i] = kInvalidTid; + for (uptr i = 0; i < kDirtyTids; i++) + dst->dirty_[i].tid = kInvalidTid; dst->release_store_tid_ = tid_; dst->release_store_reused_ = reused_; // Rememeber that we don't need to acquire it in future. dst->elem(tid_).reused = reused_; + + // If the resulting clock is cachable, cache it for future release operations. + // The clock is always cachable if we released to an empty sync object. + if (cached_idx_ == 0 && dst->Cachable()) { + // Grab a reference to the ClockBlock. + atomic_uint32_t *ref = ref_ptr(dst->tab_); + if (atomic_load(ref, memory_order_acquire) == 1) + atomic_store_relaxed(ref, 2); + else + atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed); + cached_idx_ = dst->tab_idx_; + cached_size_ = dst->size_; + cached_blocks_ = dst->blocks_; + } } void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { @@ -261,37 +316,36 @@ void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { } // Updates only single element related to the current thread in dst->clk_. -void ThreadClock::UpdateCurrentThread(SyncClock *dst) const { +void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const { // Update the threads time, but preserve 'acquired' flag. - dst->elem(tid_).epoch = clk_[tid_].epoch; - for (unsigned i = 0; i < kDirtyTids; i++) { - if (dst->dirty_tids_[i] == tid_) { + SyncClock::Dirty *dirty = &dst->dirty_[i]; + const unsigned tid = dirty->tid; + if (tid == tid_ || tid == kInvalidTid) { CPP_STAT_INC(StatClockReleaseFast); - return; - } - if (dst->dirty_tids_[i] == kInvalidTid) { - CPP_STAT_INC(StatClockReleaseFast); - dst->dirty_tids_[i] = tid_; + dirty->tid = tid_; + dirty->epoch = clk_[tid_]; return; } } // Reset all 'acquired' flags, O(N). + // We are going to touch dst elements, so we need to unshare it. + dst->Unshare(c); CPP_STAT_INC(StatClockReleaseSlow); + dst->elem(tid_).epoch = clk_[tid_]; for (uptr i = 0; i < dst->size_; i++) dst->elem(i).reused = 0; - for (unsigned i = 0; i < kDirtyTids; i++) - dst->dirty_tids_[i] = kInvalidTid; + dst->FlushDirty(); } -// Checks whether the current threads has already acquired src. +// Checks whether the current thread has already acquired src. bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { if (src->elem(tid_).reused != reused_) return false; for (unsigned i = 0; i < kDirtyTids; i++) { - unsigned tid = src->dirty_tids_[i]; - if (tid != kInvalidTid) { - if (clk_[tid].epoch < src->elem(tid).epoch) + SyncClock::Dirty dirty = src->dirty_[i]; + if (dirty.tid != kInvalidTid) { + if (clk_[dirty.tid] < dirty.epoch) return false; } } @@ -302,22 +356,19 @@ bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { // This function is called only from weird places like AcquireGlobal. void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) { DCHECK_LT(tid, kMaxTid); - DCHECK_GE(v, clk_[tid].epoch); - clk_[tid].epoch = v; + DCHECK_GE(v, clk_[tid]); + clk_[tid] = v; if (nclk_ <= tid) nclk_ = tid + 1; - last_acquire_ = clk_[tid_].epoch; + last_acquire_ = clk_[tid_]; + ResetCached(c); } void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { printf("clock=["); for (uptr i = 0; i < nclk_; i++) - printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); - printf("] reused=["); - for (uptr i = 0; i < nclk_; i++) - printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); - printf("] tid=%u/%u last_acq=%llu", - tid_, reused_, last_acquire_); + printf("%s%llu", i == 0 ? "" : ",", clk_[i]); + printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_); } SyncClock::SyncClock() { @@ -327,22 +378,14 @@ SyncClock::SyncClock() { SyncClock::~SyncClock() { // Reset must be called before dtor. CHECK_EQ(size_, 0); + CHECK_EQ(blocks_, 0); CHECK_EQ(tab_, 0); CHECK_EQ(tab_idx_, 0); } void SyncClock::Reset(ClockCache *c) { - if (size_ == 0) { - // nothing - } else if (size_ <= ClockBlock::kClockCount) { - // One-level table. - ctx->clock_alloc.Free(c, tab_idx_); - } else { - // Two-level table. - for (uptr i = 0; i < size_; i += ClockBlock::kClockCount) - ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]); - ctx->clock_alloc.Free(c, tab_idx_); - } + if (size_) + UnrefClockBlock(c, tab_idx_, blocks_); ResetImpl(); } @@ -350,66 +393,171 @@ void SyncClock::ResetImpl() { tab_ = 0; tab_idx_ = 0; size_ = 0; + blocks_ = 0; release_store_tid_ = kInvalidTid; release_store_reused_ = 0; for (uptr i = 0; i < kDirtyTids; i++) - dirty_tids_[i] = kInvalidTid; + dirty_[i].tid = kInvalidTid; } void SyncClock::Resize(ClockCache *c, uptr nclk) { CPP_STAT_INC(StatClockReleaseResize); - if (RoundUpTo(nclk, ClockBlock::kClockCount) <= - RoundUpTo(size_, ClockBlock::kClockCount)) { - // Growing within the same block. + Unshare(c); + if (nclk <= capacity()) { // Memory is already allocated, just increase the size. size_ = nclk; return; } - if (nclk <= ClockBlock::kClockCount) { + if (size_ == 0) { // Grow from 0 to one-level table. CHECK_EQ(size_, 0); + CHECK_EQ(blocks_, 0); CHECK_EQ(tab_, 0); CHECK_EQ(tab_idx_, 0); - size_ = nclk; tab_idx_ = ctx->clock_alloc.Alloc(c); tab_ = ctx->clock_alloc.Map(tab_idx_); internal_memset(tab_, 0, sizeof(*tab_)); - return; + atomic_store_relaxed(ref_ptr(tab_), 1); + size_ = 1; + } else if (size_ > blocks_ * ClockBlock::kClockCount) { + u32 idx = ctx->clock_alloc.Alloc(c); + ClockBlock *new_cb = ctx->clock_alloc.Map(idx); + uptr top = size_ - blocks_ * ClockBlock::kClockCount; + CHECK_LT(top, ClockBlock::kClockCount); + const uptr move = top * sizeof(tab_->clock[0]); + internal_memcpy(&new_cb->clock[0], tab_->clock, move); + internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move); + internal_memset(tab_->clock, 0, move); + append_block(idx); } - // Growing two-level table. - if (size_ == 0) { - // Allocate first level table. - tab_idx_ = ctx->clock_alloc.Alloc(c); - tab_ = ctx->clock_alloc.Map(tab_idx_); - internal_memset(tab_, 0, sizeof(*tab_)); - } else if (size_ <= ClockBlock::kClockCount) { - // Transform one-level table to two-level table. - u32 old = tab_idx_; - tab_idx_ = ctx->clock_alloc.Alloc(c); - tab_ = ctx->clock_alloc.Map(tab_idx_); - internal_memset(tab_, 0, sizeof(*tab_)); - tab_->table[0] = old; - } - // At this point we have first level table allocated. + // At this point we have first level table allocated and all clock elements + // are evacuated from it to a second level block. // Add second level tables as necessary. - for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount); - i < nclk; i += ClockBlock::kClockCount) { + while (nclk > capacity()) { u32 idx = ctx->clock_alloc.Alloc(c); ClockBlock *cb = ctx->clock_alloc.Map(idx); internal_memset(cb, 0, sizeof(*cb)); - CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0); - tab_->table[i/ClockBlock::kClockCount] = idx; + append_block(idx); } size_ = nclk; } -ClockElem &SyncClock::elem(unsigned tid) const { +// Flushes all dirty elements into the main clock array. +void SyncClock::FlushDirty() { + for (unsigned i = 0; i < kDirtyTids; i++) { + Dirty *dirty = &dirty_[i]; + if (dirty->tid != kInvalidTid) { + CHECK_LT(dirty->tid, size_); + elem(dirty->tid).epoch = dirty->epoch; + dirty->tid = kInvalidTid; + } + } +} + +bool SyncClock::IsShared() const { + if (size_ == 0) + return false; + atomic_uint32_t *ref = ref_ptr(tab_); + u32 v = atomic_load(ref, memory_order_acquire); + CHECK_GT(v, 0); + return v > 1; +} + +// Unshares the current clock if it's shared. +// Shared clocks are immutable, so they need to be unshared before any updates. +// Note: this does not apply to dirty entries as they are not shared. +void SyncClock::Unshare(ClockCache *c) { + if (!IsShared()) + return; + // First, copy current state into old. + SyncClock old; + old.tab_ = tab_; + old.tab_idx_ = tab_idx_; + old.size_ = size_; + old.blocks_ = blocks_; + old.release_store_tid_ = release_store_tid_; + old.release_store_reused_ = release_store_reused_; + for (unsigned i = 0; i < kDirtyTids; i++) + old.dirty_[i] = dirty_[i]; + // Then, clear current object. + ResetImpl(); + // Allocate brand new clock in the current object. + Resize(c, old.size_); + // Now copy state back into this object. + Iter old_iter(&old); + for (ClockElem &ce : *this) { + ce = *old_iter; + ++old_iter; + } + release_store_tid_ = old.release_store_tid_; + release_store_reused_ = old.release_store_reused_; + for (unsigned i = 0; i < kDirtyTids; i++) + dirty_[i] = old.dirty_[i]; + // Drop reference to old and delete if necessary. + old.Reset(c); +} + +// Can we cache this clock for future release operations? +ALWAYS_INLINE bool SyncClock::Cachable() const { + if (size_ == 0) + return false; + for (unsigned i = 0; i < kDirtyTids; i++) { + if (dirty_[i].tid != kInvalidTid) + return false; + } + return atomic_load_relaxed(ref_ptr(tab_)) == 1; +} + +// elem linearizes the two-level structure into linear array. +// Note: this is used only for one time accesses, vector operations use +// the iterator as it is much faster. +ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const { DCHECK_LT(tid, size_); - if (size_ <= ClockBlock::kClockCount) + const uptr block = tid / ClockBlock::kClockCount; + DCHECK_LE(block, blocks_); + tid %= ClockBlock::kClockCount; + if (block == blocks_) return tab_->clock[tid]; - u32 idx = tab_->table[tid / ClockBlock::kClockCount]; + u32 idx = get_block(block); ClockBlock *cb = ctx->clock_alloc.Map(idx); - return cb->clock[tid % ClockBlock::kClockCount]; + return cb->clock[tid]; +} + +ALWAYS_INLINE uptr SyncClock::capacity() const { + if (size_ == 0) + return 0; + uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]); + // How many clock elements we can fit into the first level block. + // +1 for ref counter. + uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio; + return blocks_ * ClockBlock::kClockCount + top; +} + +ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const { + DCHECK(size_); + DCHECK_LT(bi, blocks_); + return tab_->table[ClockBlock::kBlockIdx - bi]; +} + +ALWAYS_INLINE void SyncClock::append_block(u32 idx) { + uptr bi = blocks_++; + CHECK_EQ(get_block(bi), 0); + tab_->table[ClockBlock::kBlockIdx - bi] = idx; +} + +// Used only by tests. +u64 SyncClock::get(unsigned tid) const { + for (unsigned i = 0; i < kDirtyTids; i++) { + Dirty dirty = dirty_[i]; + if (dirty.tid == tid) + return dirty.epoch; + } + return elem(tid).epoch; +} + +// Used only by Iter test. +u64 SyncClock::get_clean(unsigned tid) const { + return elem(tid).epoch; } void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { @@ -419,8 +567,32 @@ void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { printf("] reused=["); for (uptr i = 0; i < size_; i++) printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); - printf("] release_store_tid=%d/%d dirty_tids=%d/%d", + printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]", release_store_tid_, release_store_reused_, - dirty_tids_[0], dirty_tids_[1]); + dirty_[0].tid, dirty_[0].epoch, + dirty_[1].tid, dirty_[1].epoch); +} + +void SyncClock::Iter::Next() { + // Finished with the current block, move on to the next one. + block_++; + if (block_ < parent_->blocks_) { + // Iterate over the next second level block. + u32 idx = parent_->get_block(block_); + ClockBlock *cb = ctx->clock_alloc.Map(idx); + pos_ = &cb->clock[0]; + end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount, + ClockBlock::kClockCount); + return; + } + if (block_ == parent_->blocks_ && + parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) { + // Iterate over elements in the first level block. + pos_ = &parent_->tab_->clock[0]; + end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount, + ClockBlock::kClockCount); + return; + } + parent_ = nullptr; // denotes end } } // namespace __tsan diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.h b/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.h index 378b550fd11b..a891d7bbd889 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.h +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.h @@ -18,25 +18,6 @@ namespace __tsan { -struct ClockElem { - u64 epoch : kClkBits; - u64 reused : 64 - kClkBits; -}; - -struct ClockBlock { - static const uptr kSize = 512; - static const uptr kTableSize = kSize / sizeof(u32); - static const uptr kClockCount = kSize / sizeof(ClockElem); - - union { - u32 table[kTableSize]; - ClockElem clock[kClockCount]; - }; - - ClockBlock() { - } -}; - typedef DenseSlabAlloc ClockAlloc; typedef DenseSlabAllocCache ClockCache; @@ -46,69 +27,117 @@ class SyncClock { SyncClock(); ~SyncClock(); - uptr size() const { - return size_; - } + uptr size() const; - u64 get(unsigned tid) const { - return elem(tid).epoch; - } + // These are used only in tests. + u64 get(unsigned tid) const; + u64 get_clean(unsigned tid) const; void Resize(ClockCache *c, uptr nclk); void Reset(ClockCache *c); void DebugDump(int(*printf)(const char *s, ...)); + // Clock element iterator. + // Note: it iterates only over the table without regard to dirty entries. + class Iter { + public: + explicit Iter(SyncClock* parent); + Iter& operator++(); + bool operator!=(const Iter& other); + ClockElem &operator*(); + + private: + SyncClock *parent_; + // [pos_, end_) is the current continuous range of clock elements. + ClockElem *pos_; + ClockElem *end_; + int block_; // Current number of second level block. + + NOINLINE void Next(); + }; + + Iter begin(); + Iter end(); + private: - friend struct ThreadClock; + friend class ThreadClock; + friend class Iter; static const uptr kDirtyTids = 2; + struct Dirty { + u64 epoch : kClkBits; + u64 tid : 64 - kClkBits; // kInvalidId if not active + }; + unsigned release_store_tid_; unsigned release_store_reused_; - unsigned dirty_tids_[kDirtyTids]; - // tab_ contains indirect pointer to a 512b block using DenseSlabAlloc. - // If size_ <= 64, then tab_ points to an array with 64 ClockElem's. - // Otherwise, tab_ points to an array with 128 u32 elements, + Dirty dirty_[kDirtyTids]; + // If size_ is 0, tab_ is nullptr. + // If size <= 64 (kClockCount), tab_ contains pointer to an array with + // 64 ClockElem's (ClockBlock::clock). + // Otherwise, tab_ points to an array with up to 127 u32 elements, // each pointing to the second-level 512b block with 64 ClockElem's. + // Unused space in the first level ClockBlock is used to store additional + // clock elements. + // The last u32 element in the first level ClockBlock is always used as + // reference counter. + // + // See the following scheme for details. + // All memory blocks are 512 bytes (allocated from ClockAlloc). + // Clock (clk) elements are 64 bits. + // Idx and ref are 32 bits. + // + // tab_ + // | + // \/ + // +----------------------------------------------------+ + // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref | + // +----------------------------------------------------+ + // | | + // | \/ + // | +----------------+ + // | | clk0 ... clk63 | + // | +----------------+ + // \/ + // +------------------+ + // | clk64 ... clk127 | + // +------------------+ + // + // Note: dirty entries, if active, always override what's stored in the clock. ClockBlock *tab_; u32 tab_idx_; - u32 size_; + u16 size_; + u16 blocks_; // Number of second level blocks. + void Unshare(ClockCache *c); + bool IsShared() const; + bool Cachable() const; void ResetImpl(); + void FlushDirty(); + uptr capacity() const; + u32 get_block(uptr bi) const; + void append_block(u32 idx); ClockElem &elem(unsigned tid) const; }; // The clock that lives in threads. -struct ThreadClock { +class ThreadClock { public: typedef DenseSlabAllocCache Cache; explicit ThreadClock(unsigned tid, unsigned reused = 0); - u64 get(unsigned tid) const { - DCHECK_LT(tid, kMaxTidInClock); - return clk_[tid].epoch; - } - + u64 get(unsigned tid) const; void set(ClockCache *c, unsigned tid, u64 v); + void set(u64 v); + void tick(); + uptr size() const; - void set(u64 v) { - DCHECK_GE(v, clk_[tid_].epoch); - clk_[tid_].epoch = v; - } - - void tick() { - clk_[tid_].epoch++; - } - - uptr size() const { - return nclk_; - } - - void acquire(ClockCache *c, const SyncClock *src); - void release(ClockCache *c, SyncClock *dst) const; + void acquire(ClockCache *c, SyncClock *src); + void release(ClockCache *c, SyncClock *dst); void acq_rel(ClockCache *c, SyncClock *dst); - void ReleaseStore(ClockCache *c, SyncClock *dst) const; + void ReleaseStore(ClockCache *c, SyncClock *dst); void ResetCached(ClockCache *c); void DebugReset(); @@ -116,16 +145,82 @@ struct ThreadClock { private: static const uptr kDirtyTids = SyncClock::kDirtyTids; + // Index of the thread associated with he clock ("current thread"). const unsigned tid_; - const unsigned reused_; + const unsigned reused_; // tid_ reuse count. + // Current thread time when it acquired something from other threads. u64 last_acquire_; + + // Cached SyncClock (without dirty entries and release_store_tid_). + // We reuse it for subsequent store-release operations without intervening + // acquire operations. Since it is shared (and thus constant), clock value + // for the current thread is then stored in dirty entries in the SyncClock. + // We host a refernece to the table while it is cached here. + u32 cached_idx_; + u16 cached_size_; + u16 cached_blocks_; + + // Number of active elements in the clk_ table (the rest is zeros). uptr nclk_; - ClockElem clk_[kMaxTidInClock]; + u64 clk_[kMaxTidInClock]; // Fixed size vector clock. bool IsAlreadyAcquired(const SyncClock *src) const; - void UpdateCurrentThread(SyncClock *dst) const; + void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const; }; +ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const { + DCHECK_LT(tid, kMaxTidInClock); + return clk_[tid]; +} + +ALWAYS_INLINE void ThreadClock::set(u64 v) { + DCHECK_GE(v, clk_[tid_]); + clk_[tid_] = v; +} + +ALWAYS_INLINE void ThreadClock::tick() { + clk_[tid_]++; +} + +ALWAYS_INLINE uptr ThreadClock::size() const { + return nclk_; +} + +ALWAYS_INLINE SyncClock::Iter SyncClock::begin() { + return Iter(this); +} + +ALWAYS_INLINE SyncClock::Iter SyncClock::end() { + return Iter(nullptr); +} + +ALWAYS_INLINE uptr SyncClock::size() const { + return size_; +} + +ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent) + : parent_(parent) + , pos_(nullptr) + , end_(nullptr) + , block_(-1) { + if (parent) + Next(); +} + +ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() { + pos_++; + if (UNLIKELY(pos_ >= end_)) + Next(); + return *this; +} + +ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) { + return parent_ != other.parent_; +} + +ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() { + return *pos_; +} } // namespace __tsan #endif // TSAN_CLOCK_H diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_defs.h b/contrib/compiler-rt/lib/tsan/rtl/tsan_defs.h index 8977fea7c552..3c775debfb09 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_defs.h +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_defs.h @@ -38,15 +38,40 @@ namespace __tsan { +const int kClkBits = 42; +const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1; + +struct ClockElem { + u64 epoch : kClkBits; + u64 reused : 64 - kClkBits; // tid reuse count +}; + +struct ClockBlock { + static const uptr kSize = 512; + static const uptr kTableSize = kSize / sizeof(u32); + static const uptr kClockCount = kSize / sizeof(ClockElem); + static const uptr kRefIdx = kTableSize - 1; + static const uptr kBlockIdx = kTableSize - 2; + + union { + u32 table[kTableSize]; + ClockElem clock[kClockCount]; + }; + + ClockBlock() { + } +}; + const int kTidBits = 13; -const unsigned kMaxTid = 1 << kTidBits; +// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is +// occupied by reference counter, so total number of elements we can store +// in SyncClock is kClockCount * (kTableSize - 1). +const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount; #if !SANITIZER_GO const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit. #else const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory. #endif -const int kClkBits = 42; -const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1; const uptr kShadowStackSize = 64 * 1024; // Count of shadow values in a shadow cell. @@ -74,7 +99,7 @@ const bool kCollectHistory = false; const bool kCollectHistory = true; #endif -const unsigned kInvalidTid = (unsigned)-1; +const u16 kInvalidTid = kMaxTid + 1; // The following "build consistency" machinery ensures that all source files // are built in the same configuration. Inconsistent builds lead to diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_mman.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_mman.cc index 1434cf688ce9..f79dccddba9f 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_mman.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_mman.cc @@ -10,6 +10,7 @@ // This file is a part of ThreadSanitizer (TSan), a race detector. // //===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_allocator_checks.h" #include "sanitizer_common/sanitizer_allocator_interface.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_placement_new.h" diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc index 0ba01babe69a..ead1e5704989 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc @@ -286,7 +286,7 @@ void InitializePlatform() { int ExtractResolvFDs(void *state, int *fds, int nfd) { #if SANITIZER_LINUX && !SANITIZER_ANDROID int cnt = 0; - __res_state *statp = (__res_state*)state; + struct __res_state *statp = (struct __res_state*)state; for (int i = 0; i < MAXNS && cnt < nfd; i++) { if (statp->_u._ext.nsaddrs[i] && statp->_u._ext.nssocks[i] != -1) fds[cnt++] = statp->_u._ext.nssocks[i]; diff --git a/contrib/compiler-rt/lib/ubsan/ubsan_handlers.cc b/contrib/compiler-rt/lib/ubsan/ubsan_handlers.cc index 185752719aff..75a4490a1843 100644 --- a/contrib/compiler-rt/lib/ubsan/ubsan_handlers.cc +++ b/contrib/compiler-rt/lib/ubsan/ubsan_handlers.cc @@ -573,14 +573,19 @@ static void handlePointerOverflowImpl(PointerOverflowData *Data, ScopedReport R(Opts, Loc, ET); - if ((sptr(Base) >= 0) == (sptr(Result) >= 0)) - Diag(Loc, DL_Error, "unsigned pointer index expression result is %0, " - "preceding its base %1") - << (void *)Result << (void *)Base; - else + if ((sptr(Base) >= 0) == (sptr(Result) >= 0)) { + if (Base > Result) + Diag(Loc, DL_Error, "addition of unsigned offset to %0 overflowed to %1") + << (void *)Base << (void *)Result; + else + Diag(Loc, DL_Error, + "subtraction of unsigned offset from %0 overflowed to %1") + << (void *)Base << (void *)Result; + } else { Diag(Loc, DL_Error, "pointer index expression with base %0 overflowed to %1") << (void *)Base << (void *)Result; + } } void __ubsan::__ubsan_handle_pointer_overflow(PointerOverflowData *Data, diff --git a/contrib/libc++/include/__config b/contrib/libc++/include/__config index 003e1ea60c60..f15d2d06e564 100644 --- a/contrib/libc++/include/__config +++ b/contrib/libc++/include/__config @@ -229,8 +229,9 @@ # define _LIBCPP_SHORT_WCHAR 1 // Both MinGW and native MSVC provide a "MSVC"-like enviroment # define _LIBCPP_MSVCRT_LIKE -// If mingw not explicitly detected, assume using MS C runtime only. -# ifndef __MINGW32__ +// If mingw not explicitly detected, assume using MS C runtime only if +// a MS compatibility version is specified. +# if defined(_MSC_VER) && !defined(__MINGW32__) # define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library # endif # if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) @@ -625,7 +626,6 @@ namespace std { #define _LIBCPP_HIDDEN #define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS #define _LIBCPP_TEMPLATE_VIS -#define _LIBCPP_FUNC_VIS_ONLY #define _LIBCPP_ENUM_VIS #if defined(_LIBCPP_COMPILER_MSVC) @@ -684,10 +684,6 @@ namespace std { # endif #endif -#ifndef _LIBCPP_FUNC_VIS_ONLY -# define _LIBCPP_FUNC_VIS_ONLY _LIBCPP_FUNC_VIS -#endif - #ifndef _LIBCPP_EXTERN_VIS # define _LIBCPP_EXTERN_VIS #endif @@ -925,8 +921,10 @@ template struct __static_assert_check {}; # define _LIBCPP_STD_VER 11 # elif __cplusplus <= 201402L # define _LIBCPP_STD_VER 14 +# elif __cplusplus <= 201703L +# define _LIBCPP_STD_VER 17 # else -# define _LIBCPP_STD_VER 16 // current year, or date of c++17 ratification +# define _LIBCPP_STD_VER 18 // current year, or date of c++2a ratification # endif #endif // _LIBCPP_STD_VER diff --git a/contrib/libc++/include/algorithm b/contrib/libc++/include/algorithm index 9fe0361e577d..4542275adfda 100644 --- a/contrib/libc++/include/algorithm +++ b/contrib/libc++/include/algorithm @@ -4234,10 +4234,6 @@ sort(__wrap_iter<_Tp*> __first, __wrap_iter<_Tp*> __last, _Compare __comp) _VSTD::sort<_Tp*, _Comp_ref>(__first.base(), __last.base(), __comp); } -#ifdef _LIBCPP_MSVC -#pragma warning( push ) -#pragma warning( disable: 4231) -#endif // _LIBCPP_MSVC _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, char*>(char*, char*, __less&)) _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, wchar_t*>(wchar_t*, wchar_t*, __less&)) _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, signed char*>(signed char*, signed char*, __less&)) @@ -4271,9 +4267,6 @@ _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, long double*>(long double*, long double*, __less&)) _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS unsigned __sort5<__less&, long double*>(long double*, long double*, long double*, long double*, long double*, __less&)) -#ifdef _LIBCPP_MSVC -#pragma warning( pop ) -#endif // _LIBCPP_MSVC // lower_bound diff --git a/contrib/libc++/include/string b/contrib/libc++/include/string index 010a4c7816ea..cf42f529c701 100644 --- a/contrib/libc++/include/string +++ b/contrib/libc++/include/string @@ -578,14 +578,7 @@ __basic_string_common<__b>::__throw_out_of_range() const _VSTD::__throw_out_of_range("basic_string"); } -#ifdef _LIBCPP_MSVC -#pragma warning( push ) -#pragma warning( disable: 4231 ) -#endif // _LIBCPP_MSVC _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __basic_string_common) -#ifdef _LIBCPP_MSVC -#pragma warning( pop ) -#endif // _LIBCPP_MSVC #ifdef _LIBCPP_NO_EXCEPTIONS template @@ -4006,7 +3999,7 @@ basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string) _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string) -_LIBCPP_EXTERN_TEMPLATE(string operator+, allocator >(char const*, string const&)) +_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS string operator+, allocator >(char const*, string const&)) #if _LIBCPP_STD_VER > 11 // Literal suffixes for basic_string [basic.string.literals] diff --git a/contrib/libc++/include/vector b/contrib/libc++/include/vector index ee19fb7081a2..6e9920a0f80f 100644 --- a/contrib/libc++/include/vector +++ b/contrib/libc++/include/vector @@ -310,14 +310,7 @@ __vector_base_common<__b>::__throw_out_of_range() const _VSTD::__throw_out_of_range("vector"); } -#ifdef _LIBCPP_MSVC -#pragma warning( push ) -#pragma warning( disable: 4231 ) -#endif // _LIBCPP_MSVC _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __vector_base_common) -#ifdef _LIBCPP_MSVC -#pragma warning( pop ) -#endif // _LIBCPP_MSVC template class __vector_base diff --git a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h index 8cae63c3c869..b566aeaf1fd6 100644 --- a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -29,9 +29,9 @@ namespace llvm { /// DominanceFrontierBase - Common base class for computing forward and inverse /// dominance frontiers for a function. /// -template +template class DominanceFrontierBase { -public: + public: typedef std::set DomSetType; // Dom set for a bb typedef std::map DomSetMapType; // Dom set map @@ -40,10 +40,10 @@ class DominanceFrontierBase { DomSetMapType Frontiers; std::vector Roots; - const bool IsPostDominators; + static constexpr bool IsPostDominators = IsPostDom; -public: - DominanceFrontierBase(bool isPostDom) : IsPostDominators(isPostDom) {} + public: + DominanceFrontierBase() {} /// getRoots - Return the root blocks of the current CFG. This may include /// multiple blocks if we are computing post dominators. For forward @@ -96,7 +96,7 @@ class DominanceFrontierBase { /// compare - Return true if the other dominance frontier base matches /// this dominance frontier base. Otherwise return false. - bool compare(DominanceFrontierBase &Other) const; + bool compare(DominanceFrontierBase &Other) const; /// print - Convert to human readable form /// @@ -113,22 +113,21 @@ class DominanceFrontierBase { /// used to compute a forward dominator frontiers. /// template -class ForwardDominanceFrontierBase : public DominanceFrontierBase { -private: +class ForwardDominanceFrontierBase + : public DominanceFrontierBase { + private: typedef GraphTraits BlockTraits; public: - typedef DominatorTreeBase DomTreeT; - typedef DomTreeNodeBase DomTreeNodeT; - typedef typename DominanceFrontierBase::DomSetType DomSetType; + typedef DomTreeBase DomTreeT; + typedef DomTreeNodeBase DomTreeNodeT; + typedef typename DominanceFrontierBase::DomSetType DomSetType; - ForwardDominanceFrontierBase() : DominanceFrontierBase(false) {} - - void analyze(DomTreeT &DT) { - this->Roots = DT.getRoots(); - assert(this->Roots.size() == 1 && - "Only one entry block for forward domfronts!"); - calculate(DT, DT[this->Roots[0]]); + void analyze(DomTreeT &DT) { + this->Roots = DT.getRoots(); + assert(this->Roots.size() == 1 && + "Only one entry block for forward domfronts!"); + calculate(DT, DT[this->Roots[0]]); } const DomSetType &calculate(const DomTreeT &DT, const DomTreeNodeT *Node); @@ -136,15 +135,16 @@ class ForwardDominanceFrontierBase : public DominanceFrontierBase { class DominanceFrontier : public ForwardDominanceFrontierBase { public: - typedef DominatorTreeBase DomTreeT; - typedef DomTreeNodeBase DomTreeNodeT; - typedef DominanceFrontierBase::DomSetType DomSetType; - typedef DominanceFrontierBase::iterator iterator; - typedef DominanceFrontierBase::const_iterator const_iterator; + typedef DomTreeBase DomTreeT; + typedef DomTreeNodeBase DomTreeNodeT; + typedef DominanceFrontierBase::DomSetType DomSetType; + typedef DominanceFrontierBase::iterator iterator; + typedef DominanceFrontierBase::const_iterator + const_iterator; - /// Handle invalidation explicitly. - bool invalidate(Function &F, const PreservedAnalyses &PA, - FunctionAnalysisManager::Invalidator &); + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); }; class DominanceFrontierWrapperPass : public FunctionPass { @@ -168,7 +168,8 @@ class DominanceFrontierWrapperPass : public FunctionPass { void dump() const; }; -extern template class DominanceFrontierBase; +extern template class DominanceFrontierBase; +extern template class DominanceFrontierBase; extern template class ForwardDominanceFrontierBase; /// \brief Analysis pass which computes a \c DominanceFrontier. diff --git a/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h b/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h index 9f8cacc24f2c..5093b975e709 100644 --- a/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h +++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h @@ -39,33 +39,33 @@ class DFCalculateWorkObject { const DomTreeNodeT *parentNode; }; -template -void DominanceFrontierBase::removeBlock(BlockT *BB) { +template +void DominanceFrontierBase::removeBlock(BlockT *BB) { assert(find(BB) != end() && "Block is not in DominanceFrontier!"); for (iterator I = begin(), E = end(); I != E; ++I) I->second.erase(BB); Frontiers.erase(BB); } -template -void DominanceFrontierBase::addToFrontier(iterator I, - BlockT *Node) { +template +void DominanceFrontierBase::addToFrontier(iterator I, + BlockT *Node) { assert(I != end() && "BB is not in DominanceFrontier!"); assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB"); I->second.erase(Node); } -template -void DominanceFrontierBase::removeFromFrontier(iterator I, - BlockT *Node) { +template +void DominanceFrontierBase::removeFromFrontier( + iterator I, BlockT *Node) { assert(I != end() && "BB is not in DominanceFrontier!"); assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB"); I->second.erase(Node); } -template -bool DominanceFrontierBase::compareDomSet(DomSetType &DS1, - const DomSetType &DS2) const { +template +bool DominanceFrontierBase::compareDomSet( + DomSetType &DS1, const DomSetType &DS2) const { std::set tmpSet; for (BlockT *BB : DS2) tmpSet.insert(BB); @@ -88,9 +88,9 @@ bool DominanceFrontierBase::compareDomSet(DomSetType &DS1, return false; } -template -bool DominanceFrontierBase::compare( - DominanceFrontierBase &Other) const { +template +bool DominanceFrontierBase::compare( + DominanceFrontierBase &Other) const { DomSetMapType tmpFrontiers; for (typename DomSetMapType::const_iterator I = Other.begin(), E = Other.end(); @@ -118,8 +118,8 @@ bool DominanceFrontierBase::compare( return false; } -template -void DominanceFrontierBase::print(raw_ostream &OS) const { +template +void DominanceFrontierBase::print(raw_ostream &OS) const { for (const_iterator I = begin(), E = end(); I != E; ++I) { OS << " DomFrontier for BB "; if (I->first) @@ -142,8 +142,8 @@ void DominanceFrontierBase::print(raw_ostream &OS) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -template -void DominanceFrontierBase::dump() const { +template +void DominanceFrontierBase::dump() const { print(dbgs()); } #endif diff --git a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h index bd74d6bd14c3..edaf4e9025bc 100644 --- a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h +++ b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -42,11 +42,11 @@ namespace llvm { /// By default, liveness is not used to prune the IDF computation. /// The template parameters should be either BasicBlock* or Inverse, depending on if you want the forward or reverse IDF. -template +template class IDFCalculator { - -public: - IDFCalculator(DominatorTreeBase &DT) : DT(DT), useLiveIn(false) {} + public: + IDFCalculator(DominatorTreeBase &DT) + : DT(DT), useLiveIn(false) {} /// \brief Give the IDF calculator the set of blocks in which the value is /// defined. This is equivalent to the set of starting blocks it should be @@ -84,12 +84,12 @@ class IDFCalculator { void calculate(SmallVectorImpl &IDFBlocks); private: - DominatorTreeBase &DT; - bool useLiveIn; - const SmallPtrSetImpl *LiveInBlocks; - const SmallPtrSetImpl *DefBlocks; + DominatorTreeBase &DT; + bool useLiveIn; + const SmallPtrSetImpl *LiveInBlocks; + const SmallPtrSetImpl *DefBlocks; }; -typedef IDFCalculator ForwardIDFCalculator; -typedef IDFCalculator> ReverseIDFCalculator; +typedef IDFCalculator ForwardIDFCalculator; +typedef IDFCalculator, true> ReverseIDFCalculator; } #endif diff --git a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h index 3a052761ad7d..a025f2275fb4 100644 --- a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -43,6 +43,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -908,7 +909,7 @@ class LazyCallGraph { /// This sets up the graph and computes all of the entry points of the graph. /// No function definitions are scanned until their nodes in the graph are /// requested during traversal. - LazyCallGraph(Module &M); + LazyCallGraph(Module &M, TargetLibraryInfo &TLI); LazyCallGraph(LazyCallGraph &&G); LazyCallGraph &operator=(LazyCallGraph &&RHS); @@ -966,6 +967,22 @@ class LazyCallGraph { return insertInto(F, N); } + /// Get the sequence of known and defined library functions. + /// + /// These functions, because they are known to LLVM, can have calls + /// introduced out of thin air from arbitrary IR. + ArrayRef getLibFunctions() const { + return LibFunctions.getArrayRef(); + } + + /// Test whether a function is a known and defined library function tracked by + /// the call graph. + /// + /// Because these functions are known to LLVM they are specially modeled in + /// the call graph and even when all IR-level references have been removed + /// remain active and reachable. + bool isLibFunction(Function &F) const { return LibFunctions.count(&F); } + ///@{ /// \name Pre-SCC Mutation API /// @@ -1100,6 +1117,11 @@ class LazyCallGraph { /// These are all of the RefSCCs which have no children. SmallVector LeafRefSCCs; + /// Defined functions that are also known library functions which the + /// optimizer can reason about and therefore might introduce calls to out of + /// thin air. + SmallSetVector LibFunctions; + /// Helper to insert a new function, with an already looked-up entry in /// the NodeMap. Node &insertInto(Function &F, Node *&MappedN); @@ -1216,8 +1238,8 @@ class LazyCallGraphAnalysis : public AnalysisInfoMixin { /// /// This just builds the set of entry points to the call graph. The rest is /// built lazily as it is walked. - LazyCallGraph run(Module &M, ModuleAnalysisManager &) { - return LazyCallGraph(M); + LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) { + return LazyCallGraph(M, AM.getResult(M)); } }; diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h index 096df1e421a7..70ce9a870517 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h +++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h @@ -56,7 +56,8 @@ class Loop; class MDNode; class PHINode; class raw_ostream; -template class DominatorTreeBase; +template +class DominatorTreeBase; template class LoopInfoBase; template class LoopBase; @@ -663,12 +664,12 @@ class LoopInfoBase { } /// Create the loop forest using a stable algorithm. - void analyze(const DominatorTreeBase &DomTree); + void analyze(const DominatorTreeBase &DomTree); // Debugging void print(raw_ostream &OS) const; - void verify(const DominatorTreeBase &DomTree) const; + void verify(const DominatorTreeBase &DomTree) const; }; // Implementation in LoopInfoImpl.h @@ -683,7 +684,7 @@ class LoopInfo : public LoopInfoBase { LoopInfo(const LoopInfo &) = delete; public: LoopInfo() {} - explicit LoopInfo(const DominatorTreeBase &DomTree); + explicit LoopInfo(const DominatorTreeBase &DomTree); LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast(Arg))) {} LoopInfo &operator=(LoopInfo &&RHS) { diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h index 372fc8b21745..e9177e68ed77 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -340,10 +340,10 @@ void LoopBase::print(raw_ostream &OS, unsigned Depth, /// Discover a subloop with the specified backedges such that: All blocks within /// this loop are mapped to this loop or a subloop. And all subloops within this /// loop have their parent loop set to this loop or a subloop. -template -static void discoverAndMapSubloop(LoopT *L, ArrayRef Backedges, - LoopInfoBase *LI, - const DominatorTreeBase &DomTree) { +template +static void discoverAndMapSubloop( + LoopT *L, ArrayRef Backedges, LoopInfoBase *LI, + const DomTreeBase &DomTree) { typedef GraphTraits > InvBlockTraits; unsigned NumBlocks = 0; @@ -462,10 +462,9 @@ void PopulateLoopsDFS::insertIntoLoop(BlockT *Block) { /// /// The Block vectors are inclusive, so step 3 requires loop-depth number of /// insertions per block. -template -void LoopInfoBase:: -analyze(const DominatorTreeBase &DomTree) { - +template +void LoopInfoBase::analyze( + const DomTreeBase &DomTree) { // Postorder traversal of the dominator tree. const DomTreeNodeBase *DomRoot = DomTree.getRootNode(); for (auto DomNode : post_order(DomRoot)) { @@ -607,7 +606,7 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL, template void LoopInfoBase::verify( - const DominatorTreeBase &DomTree) const { + const DomTreeBase &DomTree) const { DenseSet Loops; for (iterator I = begin(), E = end(); I != E; ++I) { assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); diff --git a/contrib/llvm/include/llvm/Analysis/PostDominators.h b/contrib/llvm/include/llvm/Analysis/PostDominators.h index 94ee3b03bb86..17f2e8eaf4a2 100644 --- a/contrib/llvm/include/llvm/Analysis/PostDominators.h +++ b/contrib/llvm/include/llvm/Analysis/PostDominators.h @@ -22,10 +22,8 @@ namespace llvm { /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to /// compute the post-dominator tree. /// -struct PostDominatorTree : public DominatorTreeBase { - typedef DominatorTreeBase Base; - - PostDominatorTree() : DominatorTreeBase(true) {} +struct PostDominatorTree : public PostDomTreeBase { + typedef PostDomTreeBase Base; /// Handle invalidation explicitly. bool invalidate(Function &F, const PreservedAnalyses &PA, diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h index c7accfae78b0..d1b182755cf8 100644 --- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -237,17 +237,15 @@ struct FoldingSetTrait : DefaultFoldingSetTrait { }; /// This class represents an assumption that two SCEV expressions are equal, -/// and this can be checked at run-time. We assume that the left hand side is -/// a SCEVUnknown and the right hand side a constant. +/// and this can be checked at run-time. class SCEVEqualPredicate final : public SCEVPredicate { - /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a - /// constant. - const SCEVUnknown *LHS; - const SCEVConstant *RHS; + /// We assume that LHS == RHS. + const SCEV *LHS; + const SCEV *RHS; public: - SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS, - const SCEVConstant *RHS); + SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS, + const SCEV *RHS); /// Implementation of the SCEVPredicate interface bool implies(const SCEVPredicate *N) const override; @@ -256,10 +254,10 @@ class SCEVEqualPredicate final : public SCEVPredicate { const SCEV *getExpr() const override; /// Returns the left hand side of the equality. - const SCEVUnknown *getLHS() const { return LHS; } + const SCEV *getLHS() const { return LHS; } /// Returns the right hand side of the equality. - const SCEVConstant *getRHS() const { return RHS; } + const SCEV *getRHS() const { return RHS; } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEVPredicate *P) { @@ -1241,6 +1239,14 @@ class ScalarEvolution { SmallVector NewOp(Operands.begin(), Operands.end()); return getAddRecExpr(NewOp, L, Flags); } + + /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some + /// Predicates. If successful return these ; + /// The function is intended to be called from PSCEV (the caller will decide + /// whether to actually add the predicates and carry out the rewrites). + Optional>> + createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); + /// Returns an expression for a GEP /// /// \p GEP The GEP. The indices contained in the GEP itself are ignored, @@ -1675,8 +1681,7 @@ class ScalarEvolution { return F.getParent()->getDataLayout(); } - const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS, - const SCEVConstant *RHS); + const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, @@ -1692,6 +1697,19 @@ class ScalarEvolution { SmallPtrSetImpl &Preds); private: + /// Similar to createAddRecFromPHI, but with the additional flexibility of + /// suggesting runtime overflow checks in case casts are encountered. + /// If successful, the analysis records that for this loop, \p SymbolicPHI, + /// which is the UnknownSCEV currently representing the PHI, can be rewritten + /// into an AddRec, assuming some predicates; The function then returns the + /// AddRec and the predicates as a pair, and caches this pair in + /// PredicatedSCEVRewrites. + /// If the analysis is not successful, a mapping from the \p SymbolicPHI to + /// itself (with no predicates) is recorded, and a nullptr with an empty + /// predicates vector is returned as a pair. + Optional>> + createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI); + /// Compute the backedge taken count knowing the interval difference, the /// stride and presence of the equality in the comparison. const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, @@ -1722,6 +1740,12 @@ class ScalarEvolution { FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression + /// they can be rewritten into under certain predicates. + DenseMap, + std::pair>> + PredicatedSCEVRewrites; + /// The head of a linked list of all SCEVUnknown values that have been /// allocated. This is used by releaseMemory to locate them all and call /// their destructors. diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h index dfb525e3de7a..24edd3826a2e 100644 --- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -155,6 +155,13 @@ class TargetTransformInfo { int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) const; + /// \brief Estimate the cost of a EXT operation when lowered. + /// + /// The contract for this function is the same as \c getOperationCost except + /// that it supports an interface that provides extra information specific to + /// the EXT operation. + int getExtCost(const Instruction *I, const Value *Src) const; + /// \brief Estimate the cost of a function call when lowered. /// /// The contract for this is the same as \c getOperationCost except that it @@ -849,6 +856,7 @@ class TargetTransformInfo::Concept { virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) = 0; + virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; virtual int getCallCost(const Function *F, int NumArgs) = 0; virtual int getCallCost(const Function *F, @@ -1022,6 +1030,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { ArrayRef Operands) override { return Impl.getGEPCost(PointeeType, Ptr, Operands); } + int getExtCost(const Instruction *I, const Value *Src) override { + return Impl.getExtCost(I, Src); + } int getCallCost(FunctionType *FTy, int NumArgs) override { return Impl.getCallCost(FTy, NumArgs); } diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 8740ee92eed5..0b07fe9aa232 100644 --- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -120,6 +120,10 @@ class TargetTransformInfoImplBase { return SI.getNumCases(); } + int getExtCost(const Instruction *I, const Value *Src) { + return TTI::TCC_Basic; + } + unsigned getCallCost(FunctionType *FTy, int NumArgs) { assert(FTy && "FunctionType must be provided to this routine."); @@ -728,6 +732,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { // nop on most sane targets. if (isa(CI->getOperand(0))) return TTI::TCC_Free; + if (isa(CI) || isa(CI) || isa(CI)) + return static_cast(this)->getExtCost(CI, Operands.back()); } return static_cast(this)->getOperationCost( diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h index b59fd60e8aed..633107024792 100644 --- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -155,6 +155,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return BaseT::getGEPCost(PointeeType, Ptr, Operands); } + int getExtCost(const Instruction *I, const Value *Src) { + if (getTLI()->isExtFree(I)) + return TargetTransformInfo::TCC_Free; + + if (isa(I) || isa(I)) + if (const LoadInst *LI = dyn_cast(Src)) + if (getTLI()->isExtLoad(LI, I, DL)) + return TargetTransformInfo::TCC_Free; + + return TargetTransformInfo::TCC_Basic; + } + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) { return BaseT::getIntrinsicCost(IID, RetTy, Arguments); diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h b/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h index 370ffbe4862e..6efeefd9a721 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h @@ -23,27 +23,24 @@ class MachineDominanceFrontier : public MachineFunctionPass { ForwardDominanceFrontierBase Base; public: - using DomTreeT = DominatorTreeBase; - using DomTreeNodeT = DomTreeNodeBase; - using DomSetType = DominanceFrontierBase::DomSetType; - using iterator = DominanceFrontierBase::iterator; - using const_iterator = - DominanceFrontierBase::const_iterator; + using DomTreeT = DomTreeBase; + using DomTreeNodeT = DomTreeNodeBase; + using DomSetType = DominanceFrontierBase::DomSetType; + using iterator = DominanceFrontierBase::iterator; + using const_iterator = + DominanceFrontierBase::const_iterator; - MachineDominanceFrontier(const MachineDominanceFrontier &) = delete; - MachineDominanceFrontier & - operator=(const MachineDominanceFrontier &) = delete; + MachineDominanceFrontier(const MachineDominanceFrontier &) = delete; + MachineDominanceFrontier &operator=(const MachineDominanceFrontier &) = delete; - static char ID; + static char ID; - MachineDominanceFrontier(); + MachineDominanceFrontier(); - DominanceFrontierBase &getBase() { - return Base; - } + DominanceFrontierBase &getBase() { return Base; } - inline const std::vector &getRoots() const { - return Base.getRoots(); + inline const std::vector &getRoots() const { + return Base.getRoots(); } MachineBasicBlock *getRoot() const { @@ -98,7 +95,7 @@ class MachineDominanceFrontier : public MachineFunctionPass { return Base.compareDomSet(DS1, DS2); } - bool compare(DominanceFrontierBase &Other) const { + bool compare(DominanceFrontierBase &Other) const { return Base.compare(Other); } diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h index 74a7c3ea04ae..8bf98f606495 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h @@ -28,13 +28,15 @@ namespace llvm { -template<> -inline void DominatorTreeBase::addRoot(MachineBasicBlock* MBB) { +template <> +inline void DominatorTreeBase::addRoot( + MachineBasicBlock *MBB) { this->Roots.push_back(MBB); } extern template class DomTreeNodeBase; -extern template class DominatorTreeBase; +extern template class DominatorTreeBase; // DomTree +extern template class DominatorTreeBase; // PostDomTree using MachineDomTreeNode = DomTreeNodeBase; @@ -65,7 +67,7 @@ class MachineDominatorTree : public MachineFunctionPass { mutable SmallSet NewBBs; /// The DominatorTreeBase that is used to compute a normal dominator tree - std::unique_ptr> DT; + std::unique_ptr> DT; /// \brief Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses @@ -79,9 +81,8 @@ class MachineDominatorTree : public MachineFunctionPass { MachineDominatorTree(); - DominatorTreeBase &getBase() { - if (!DT) - DT.reset(new DominatorTreeBase(false)); + DomTreeBase &getBase() { + if (!DT) DT.reset(new DomTreeBase()); applySplitCriticalEdges(); return *DT; } diff --git a/contrib/llvm/include/llvm/CodeGen/MachinePostDominators.h b/contrib/llvm/include/llvm/CodeGen/MachinePostDominators.h index 70bdb191ad34..d29d2d85cb0a 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachinePostDominators.h +++ b/contrib/llvm/include/llvm/CodeGen/MachinePostDominators.h @@ -26,7 +26,7 @@ namespace llvm { /// struct MachinePostDominatorTree : public MachineFunctionPass { private: - DominatorTreeBase *DT; + PostDomTreeBase *DT; public: static char ID; diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h index 70ccc867cd38..df55e181364c 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h @@ -17,7 +17,6 @@ namespace llvm { namespace codeview { class TypeCollection; -class TypeServerHandler; class TypeVisitorCallbacks; enum VisitorDataSource { @@ -31,11 +30,9 @@ enum VisitorDataSource { Error visitTypeRecord(CVType &Record, TypeIndex Index, TypeVisitorCallbacks &Callbacks, - VisitorDataSource Source = VDS_BytesPresent, - TypeServerHandler *TS = nullptr); + VisitorDataSource Source = VDS_BytesPresent); Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, - VisitorDataSource Source = VDS_BytesPresent, - TypeServerHandler *TS = nullptr); + VisitorDataSource Source = VDS_BytesPresent); Error visitMemberRecord(CVMemberRecord Record, TypeVisitorCallbacks &Callbacks, VisitorDataSource Source = VDS_BytesPresent); @@ -46,12 +43,9 @@ Error visitMemberRecordStream(ArrayRef FieldList, TypeVisitorCallbacks &Callbacks); Error visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks, - VisitorDataSource Source = VDS_BytesPresent, - TypeServerHandler *TS = nullptr); -Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks, - TypeServerHandler *TS = nullptr); -Error visitTypeStream(TypeCollection &Types, TypeVisitorCallbacks &Callbacks, - TypeServerHandler *TS = nullptr); + VisitorDataSource Source = VDS_BytesPresent); +Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks); +Error visitTypeStream(TypeCollection &Types, TypeVisitorCallbacks &Callbacks); } // end namespace codeview } // end namespace llvm diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index db944c7057f7..94f104ff772c 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -84,7 +84,7 @@ class CodeViewRecordIO { Error mapEncodedInteger(uint64_t &Value); Error mapEncodedInteger(APSInt &Value); Error mapStringZ(StringRef &Value); - Error mapGuid(StringRef &Guid); + Error mapGuid(GUID &Guid); Error mapStringZVectorZ(std::vector &Value); diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/Formatters.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/Formatters.h index 0842c1e373db..278ad02a39cd 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/Formatters.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/Formatters.h @@ -12,6 +12,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" @@ -31,7 +32,7 @@ class GuidAdapter final : public FormatAdapter> { explicit GuidAdapter(ArrayRef Guid); explicit GuidAdapter(StringRef Guid); - void format(raw_ostream &Stream, StringRef Style) override ; + void format(raw_ostream &Stream, StringRef Style) override; }; } // end namespace detail @@ -60,6 +61,13 @@ template <> struct format_provider { } }; +template <> struct format_provider { + static void format(const codeview::GUID &V, llvm::raw_ostream &Stream, + StringRef Style) { + Stream << V; + } +}; + } // end namespace llvm #endif // LLVM_DEBUGINFO_CODEVIEW_FORMATTERS_H diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/GUID.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/GUID.h new file mode 100644 index 000000000000..a055ce9e2e45 --- /dev/null +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/GUID.h @@ -0,0 +1,55 @@ +//===- GUID.h ---------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_GUID_H +#define LLVM_DEBUGINFO_CODEVIEW_GUID_H + +#include +#include + +namespace llvm { +class raw_ostream; + +namespace codeview { + +/// This represents the 'GUID' type from windows.h. +struct GUID { + uint8_t Guid[16]; +}; + +inline bool operator==(const GUID &LHS, const GUID &RHS) { + return 0 == ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)); +} + +inline bool operator<(const GUID &LHS, const GUID &RHS) { + return ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)) < 0; +} + +inline bool operator<=(const GUID &LHS, const GUID &RHS) { + return ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)) <= 0; +} + +inline bool operator>(const GUID &LHS, const GUID &RHS) { + return !(LHS <= RHS); +} + +inline bool operator>=(const GUID &LHS, const GUID &RHS) { + return !(LHS < RHS); +} + +inline bool operator!=(const GUID &LHS, const GUID &RHS) { + return !(LHS == RHS); +} + +raw_ostream &operator<<(raw_ostream &OS, const GUID &Guid); + +} // namespace codeview +} // namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index cdfc1745cea5..f3086cf3dbb9 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -848,7 +848,7 @@ class BuildInfoSym : public SymbolRecord { : SymbolRecord(SymbolRecordKind::BuildInfoSym), RecordOffset(RecordOffset) {} - uint32_t BuildId; + TypeIndex BuildId; uint32_t RecordOffset; }; diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h index 2efeb1b3cefd..7942c0c0bc21 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -18,6 +18,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Endian.h" @@ -539,15 +540,17 @@ class TypeServer2Record : public TypeRecord { public: TypeServer2Record() = default; explicit TypeServer2Record(TypeRecordKind Kind) : TypeRecord(Kind) {} - TypeServer2Record(StringRef Guid, uint32_t Age, StringRef Name) - : TypeRecord(TypeRecordKind::TypeServer2), Guid(Guid), Age(Age), - Name(Name) {} + TypeServer2Record(StringRef GuidStr, uint32_t Age, StringRef Name) + : TypeRecord(TypeRecordKind::TypeServer2), Age(Age), Name(Name) { + assert(GuidStr.size() == 16 && "guid isn't 16 bytes"); + ::memcpy(Guid.Guid, GuidStr.data(), 16); + } - StringRef getGuid() const { return Guid; } + const GUID &getGuid() const { return Guid; } uint32_t getAge() const { return Age; } StringRef getName() const { return Name; } - StringRef Guid; + GUID Guid; uint32_t Age; StringRef Name; }; diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeServerHandler.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeServerHandler.h deleted file mode 100644 index e96baad9ceae..000000000000 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeServerHandler.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- TypeServerHandler.h --------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H -#define LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H - -#include "llvm/Support/Error.h" - -namespace llvm { -namespace codeview { - -class TypeServer2Record; -class TypeVisitorCallbacks; - -class TypeServerHandler { -public: - virtual ~TypeServerHandler() = default; - - /// Handle a TypeServer record. If the implementation returns true - /// the record will not be processed by the top-level visitor. If - /// it returns false, it will be processed. If it returns an Error, - /// then the top-level visitor will fail. - virtual Expected handle(TypeServer2Record &TS, - TypeVisitorCallbacks &Callbacks) { - return false; - } -}; - -} // end namespace codeview -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index 3ad2b4e9c92f..d78fab47db66 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -19,7 +19,6 @@ namespace llvm { namespace codeview { class TypeIndex; -class TypeServerHandler; class TypeTableBuilder; /// \brief Merge one set of type records into another. This method assumes @@ -31,16 +30,13 @@ class TypeTableBuilder; /// type stream, that contains the index of the corresponding type record /// in the destination stream. /// -/// \param Handler (optional) If non-null, an interface that gets invoked -/// to handle type server records. -/// /// \param Types The collection of types to merge in. /// /// \returns Error::success() if the operation succeeded, otherwise an /// appropriate error code. Error mergeTypeRecords(TypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, - TypeServerHandler *Handler, const CVTypeArray &Types); + const CVTypeArray &Types); /// \brief Merge one set of id records into another. This method assumes /// that all records are id records, and there are no Type records present. @@ -65,7 +61,7 @@ Error mergeTypeRecords(TypeTableBuilder &Dest, /// appropriate error code. Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, SmallVectorImpl &SourceToDest, - const CVTypeArray &Ids); + const CVTypeArray &Ids); /// \brief Merge a unified set of type and id records, splitting them into /// separate output streams. @@ -78,9 +74,6 @@ Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, /// id stream, that contains the index of the corresponding id record /// in the destination stream. /// -/// \param Handler (optional) If non-null, an interface that gets invoked -/// to handle type server records. -/// /// \param IdsAndTypes The collection of id records to merge in. /// /// \returns Error::success() if the operation succeeded, otherwise an @@ -88,8 +81,7 @@ Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, Error mergeTypeAndIdRecords(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, SmallVectorImpl &SourceToDest, - TypeServerHandler *Handler, - const CVTypeArray &IdsAndTypes); + const CVTypeArray &IdsAndTypes); } // end namespace codeview } // end namespace llvm diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index ea36ab7ab5b6..056c1b77c65d 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -238,6 +238,34 @@ class DWARFUnit { uint8_t getUnitType() const { return UnitType; } + static bool isValidUnitType(uint8_t UnitType) { + return UnitType == dwarf::DW_UT_compile || UnitType == dwarf::DW_UT_type || + UnitType == dwarf::DW_UT_partial || + UnitType == dwarf::DW_UT_skeleton || + UnitType == dwarf::DW_UT_split_compile || + UnitType == dwarf::DW_UT_split_type; + } + + /// \brief Return the number of bytes for the header of a unit of + /// UnitType type. + /// + /// This function must be called with a valid unit type which in + /// DWARF5 is defined as one of the following six types. + static uint32_t getDWARF5HeaderSize(uint8_t UnitType) { + switch (UnitType) { + case dwarf::DW_UT_compile: + case dwarf::DW_UT_partial: + return 12; + case dwarf::DW_UT_skeleton: + case dwarf::DW_UT_split_compile: + return 20; + case dwarf::DW_UT_type: + case dwarf::DW_UT_split_type: + return 24; + } + llvm_unreachable("Invalid UnitType."); + } + uint64_t getBaseAddress() const { return BaseAddr; } void setBaseAddress(uint64_t base_addr) { diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 9eb5c45faba8..c0291a83ed97 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -21,6 +21,7 @@ class DWARFContext; class DWARFDie; class DWARFUnit; class DWARFAcceleratorTable; +class DWARFDataExtractor; /// A class that verifies DWARF debug information given a DWARF Context. class DWARFVerifier { @@ -30,10 +31,35 @@ class DWARFVerifier { /// can verify each reference points to a valid DIE and not an offset that /// lies between to valid DIEs. std::map> ReferenceToDIEOffsets; - uint32_t NumDebugInfoErrors = 0; uint32_t NumDebugLineErrors = 0; uint32_t NumAppleNamesErrors = 0; + /// Verifies the header of a unit in the .debug_info section. + /// + /// This function currently checks for: + /// - Unit is in 32-bit DWARF format. The function can be modified to + /// support 64-bit format. + /// - The DWARF version is valid + /// - The unit type is valid (if unit is in version >=5) + /// - The unit doesn't extend beyond .debug_info section + /// - The address size is valid + /// - The offset in the .debug_abbrev section is valid + /// + /// \param DebugInfoData The .debug_info section data + /// \param Offset A reference to the offset start of the unit. The offset will + /// be updated to point to the next unit in .debug_info + /// \param UnitIndex The index of the unit to be verified + /// \param UnitType A reference to the type of the unit + /// \param isUnitDWARF64 A reference to a flag that shows whether the unit is + /// in 64-bit format. + /// + /// \returns true if the header is verified successfully, false otherwise. + bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData, + uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType, + bool &isUnitDWARF64); + + + bool verifyUnitContents(DWARFUnit Unit); /// Verifies the attribute's DWARF attribute and its value. /// /// This function currently checks for: @@ -42,7 +68,11 @@ class DWARFVerifier { /// /// \param Die The DWARF DIE that owns the attribute value /// \param AttrValue The DWARF attribute value to check - void verifyDebugInfoAttribute(const DWARFDie &Die, DWARFAttribute &AttrValue); + /// + /// \returns NumErrors The number of errors occured during verification of + /// attributes' values in a .debug_info section unit + unsigned verifyDebugInfoAttribute(const DWARFDie &Die, + DWARFAttribute &AttrValue); /// Verifies the attribute's DWARF form. /// @@ -53,7 +83,10 @@ class DWARFVerifier { /// /// \param Die The DWARF DIE that owns the attribute value /// \param AttrValue The DWARF attribute value to check - void verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue); + /// + /// \returns NumErrors The number of errors occured during verification of + /// attributes' forms in a .debug_info section unit + unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue); /// Verifies the all valid references that were found when iterating through /// all of the DIE attributes. @@ -62,7 +95,10 @@ class DWARFVerifier { /// offset matches. This helps to ensure if a DWARF link phase moved things /// around, that it doesn't create invalid references by failing to relocate /// CU relative and absolute references. - void verifyDebugInfoReferences(); + /// + /// \returns NumErrors The number of errors occured during verification of + /// references for the .debug_info section + unsigned verifyDebugInfoReferences(); /// Verify the the DW_AT_stmt_list encoding and value and ensure that no /// compile units that have the same DW_AT_stmt_list value. diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h index 3710eb29e7f9..d37b48540ffa 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h @@ -106,7 +106,7 @@ class DIARawSymbol : public IPDBRawSymbol { getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; - PDB_UniqueId getGuid() const override; + codeview::GUID getGuid() const override; int32_t getOffset() const override; int32_t getThisAdjust() const override; int32_t getVirtualBasePointerOffset() const override; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h index 466cb455651b..03205a986f1a 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h @@ -19,6 +19,7 @@ namespace pdb { enum class generic_error_code { invalid_path = 1, dia_sdk_not_present, + type_server_not_found, unspecified, }; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h index fab086c62c72..eefc36518728 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h @@ -118,7 +118,7 @@ class IPDBRawSymbol { virtual uint32_t getVirtualTableShapeId() const = 0; virtual PDB_DataKind getDataKind() const = 0; virtual PDB_SymType getSymTag() const = 0; - virtual PDB_UniqueId getGuid() const = 0; + virtual codeview::GUID getGuid() const = 0; virtual int32_t getOffset() const = 0; virtual int32_t getThisAdjust() const = 0; virtual int32_t getVirtualBasePointerOffset() const = 0; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/Formatters.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/Formatters.h index 183f0ad8307e..7d5eab2e2a09 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/Formatters.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/Formatters.h @@ -23,13 +23,6 @@ break; namespace llvm { -template <> struct format_provider { - static void format(const pdb::PDB_UniqueId &V, llvm::raw_ostream &Stream, - StringRef Style) { - codeview::fmt_guid(V.Guid).format(Stream, Style); - } -}; - template <> struct format_provider { static void format(const pdb::PdbRaw_ImplVer &V, llvm::raw_ostream &Stream, StringRef Style) { diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h index 37bf5f3b573c..fb8271cb5ebc 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h @@ -12,6 +12,7 @@ #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" @@ -39,7 +40,7 @@ class InfoStream { PdbRaw_ImplVer getVersion() const; uint32_t getSignature() const; uint32_t getAge() const; - PDB_UniqueId getGuid() const; + codeview::GUID getGuid() const; uint32_t getNamedStreamMapByteSize() const; PdbRaw_Features getFeatures() const; @@ -71,7 +72,7 @@ class InfoStream { // Due to the aforementioned limitations with `Signature`, this is a new // signature present on VC70 and higher PDBs which is guaranteed to be // universally unique. - PDB_UniqueId Guid; + codeview::GUID Guid; BinarySubstreamRef SubNamedStreams; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h index 90c28a90d252..c6cb0e221e70 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h @@ -37,7 +37,7 @@ class InfoStreamBuilder { void setVersion(PdbRaw_ImplVer V); void setSignature(uint32_t S); void setAge(uint32_t A); - void setGuid(PDB_UniqueId G); + void setGuid(codeview::GUID G); void addFeature(PdbRaw_FeatureSig Sig); uint32_t finalize(); @@ -54,7 +54,7 @@ class InfoStreamBuilder { PdbRaw_ImplVer Ver; uint32_t Sig; uint32_t Age; - PDB_UniqueId Guid; + codeview::GUID Guid; NamedStreamMap &NamedStreams; }; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h index ddb7f811da38..587c7ff2b092 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h @@ -27,7 +27,7 @@ class NativeExeSymbol : public NativeRawSymbol { uint32_t getAge() const override; std::string getSymbolsFileName() const override; - PDB_UniqueId getGuid() const override; + codeview::GUID getGuid() const override; bool hasCTypes() const override; bool hasPrivateSymbols() const override; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h index 66a9eae28e23..2c6548dcce21 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h @@ -111,7 +111,7 @@ class NativeRawSymbol : public IPDBRawSymbol { getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; - PDB_UniqueId getGuid() const override; + codeview::GUID getGuid() const override; int32_t getOffset() const override; int32_t getThisAdjust() const override; int32_t getVirtualBasePointerOffset() const override; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h deleted file mode 100644 index 196ba4d6ffbd..000000000000 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h +++ /dev/null @@ -1,46 +0,0 @@ -//===- PDBTypeServerHandler.h -----------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_PDB_PDBTYPESERVERHANDLER_H -#define LLVM_DEBUGINFO_PDB_PDBTYPESERVERHANDLER_H - -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeServerHandler.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/PDB/PDBTypes.h" - -#include -#include - -namespace llvm { -namespace pdb { -class NativeSession; - -class PDBTypeServerHandler : public codeview::TypeServerHandler { -public: - PDBTypeServerHandler(bool RevisitAlways = false); - - void addSearchPath(StringRef Path); - Expected handle(codeview::TypeServer2Record &TS, - codeview::TypeVisitorCallbacks &Callbacks) override; - -private: - Expected handleInternal(PDBFile &File, - codeview::TypeVisitorCallbacks &Callbacks); - - bool RevisitAlways; - std::unique_ptr Session; - StringSet<> SearchPaths; -}; -} -} - -#endif diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h index a3cdd3f09a44..b6321cbf45a8 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h @@ -10,6 +10,7 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_RAWTYPES_H #define LLVM_DEBUGINFO_PDB_RAW_RAWTYPES_H +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Endian.h" @@ -268,17 +269,6 @@ struct PublicsStreamHeader { support::ulittle32_t NumSections; }; -/// Defines a 128-bit unique identifier. This maps to a GUID on Windows, but -/// is abstracted here for the purposes of non-Windows platforms that don't have -/// the GUID structure defined. -struct PDB_UniqueId { - uint8_t Guid[16]; -}; - -inline bool operator==(const PDB_UniqueId &LHS, const PDB_UniqueId &RHS) { - return 0 == ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)); -} - // The header preceeding the global TPI stream. // This corresponds to `HDR` in PDB/dbi/tpi.h. struct TpiStreamHeader { @@ -312,7 +302,7 @@ struct InfoStreamHeader { support::ulittle32_t Version; support::ulittle32_t Signature; support::ulittle32_t Age; - PDB_UniqueId Guid; + codeview::GUID Guid; }; /// The header preceeding the /names stream. diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h index 156abb59a6be..c1edec7a26fe 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h @@ -10,84 +10,13 @@ #ifndef LLVM_DEBUGINFO_PDB_TPIHASHING_H #define LLVM_DEBUGINFO_PDB_TPIHASHING_H -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include -#include namespace llvm { namespace pdb { -class TpiHashUpdater : public codeview::TypeVisitorCallbacks { -public: - TpiHashUpdater() = default; - -#define TYPE_RECORD(EnumName, EnumVal, Name) \ - virtual Error visitKnownRecord(codeview::CVType &CVR, \ - codeview::Name##Record &Record) override { \ - visitKnownRecordImpl(CVR, Record); \ - return Error::success(); \ - } -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#define MEMBER_RECORD(EnumName, EnumVal, Name) -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" - -private: - template - void visitKnownRecordImpl(codeview::CVType &CVR, RecordKind &Record) { - CVR.Hash = 0; - } - - void visitKnownRecordImpl(codeview::CVType &CVR, - codeview::UdtSourceLineRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, - codeview::UdtModSourceLineRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, codeview::ClassRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, codeview::EnumRecord &Rec); - void visitKnownRecordImpl(codeview::CVType &CVR, codeview::UnionRecord &Rec); -}; - -class TpiHashVerifier : public codeview::TypeVisitorCallbacks { -public: - TpiHashVerifier(FixedStreamArray &HashValues, - uint32_t NumHashBuckets) - : HashValues(HashValues), NumHashBuckets(NumHashBuckets) {} - - Error visitKnownRecord(codeview::CVType &CVR, - codeview::UdtSourceLineRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::UdtModSourceLineRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::ClassRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::EnumRecord &Rec) override; - Error visitKnownRecord(codeview::CVType &CVR, - codeview::UnionRecord &Rec) override; - Error visitTypeBegin(codeview::CVType &CVR) override; - -private: - Error verifySourceLine(codeview::TypeIndex TI); - - Error errorInvalidHash() { - return make_error( - raw_error_code::invalid_tpi_hash, - "Type index is 0x" + - utohexstr(codeview::TypeIndex::FirstNonSimpleIndex + Index)); - } - - FixedStreamArray HashValues; - codeview::CVType RawRecord; - uint32_t NumHashBuckets; - uint32_t Index = -1; -}; +Expected hashTypeRecord(const llvm::codeview::CVType &Type); } // end namespace pdb } // end namespace llvm diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h index 3a38f21b94c8..778121c8eb79 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h @@ -32,7 +32,6 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_Checksum &Checksum); raw_ostream &operator<<(raw_ostream &OS, const PDB_Lang &Lang); raw_ostream &operator<<(raw_ostream &OS, const PDB_SymType &Tag); raw_ostream &operator<<(raw_ostream &OS, const PDB_MemberAccess &Access); -raw_ostream &operator<<(raw_ostream &OS, const PDB_UniqueId &Guid); raw_ostream &operator<<(raw_ostream &OS, const PDB_UdtType &Type); raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine); diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index c1acca386820..27b5457fc8ff 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -22,6 +22,7 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" @@ -289,21 +290,21 @@ class CompileOnDemandLayer { // FIXME: We should track and free associated resources (unused compile // callbacks, uncompiled IR, and no-longer-needed/reachable function // implementations). - // FIXME: Return Error once the JIT APIs are Errorized. - bool updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) { + Error updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) { //Find out which logical dylib contains our symbol auto LDI = LogicalDylibs.begin(); for (auto LDE = LogicalDylibs.end(); LDI != LDE; ++LDI) { - if (auto LMResources = LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) { + if (auto LMResources = + LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) { Module &SrcM = LMResources->SourceModule->getResource(); std::string CalledFnName = mangle(FuncName, SrcM.getDataLayout()); - if (auto EC = LMResources->StubsMgr->updatePointer(CalledFnName, FnBodyAddr)) - return false; - else - return true; + if (auto Err = LMResources->StubsMgr->updatePointer(CalledFnName, + FnBodyAddr)) + return Err; + return Error::success(); } } - return false; + return make_error(FuncName); } private: @@ -363,11 +364,8 @@ class CompileOnDemandLayer { }); } - auto EC = LD.StubsMgr->createStubs(StubInits); - (void)EC; - // FIXME: This should be propagated back to the user. Stub creation may - // fail for remote JITs. - assert(!EC && "Error generating stubs"); + if (auto Err = LD.StubsMgr->createStubs(StubInits)) + return Err; } // If this module doesn't contain any globals, aliases, or module flags then diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h index a9778514b9f1..0c1862c5c3ea 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h @@ -135,12 +135,13 @@ class RTDyldMemoryManager : public MCJITMemoryManager, virtual void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true); -private: +protected: struct EHFrame { uint8_t *Addr; size_t Size; }; - std::vector EHFrames; + typedef std::vector EHFrameInfos; + EHFrameInfos EHFrames; }; // Create wrappers for C Binding types (see CBindingWrapping.h). diff --git a/contrib/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm/include/llvm/IR/CallingConv.h index 801e88aba4d1..850964afc307 100644 --- a/contrib/llvm/include/llvm/IR/CallingConv.h +++ b/contrib/llvm/include/llvm/IR/CallingConv.h @@ -143,11 +143,15 @@ namespace CallingConv { /// System V ABI, used on most non-Windows systems. X86_64_SysV = 78, - /// \brief The C convention as implemented on Windows/x86-64. This - /// convention differs from the more common \c X86_64_SysV convention - /// in a number of ways, most notably in that XMM registers used to pass - /// arguments are shadowed by GPRs, and vice versa. - X86_64_Win64 = 79, + /// \brief The C convention as implemented on Windows/x86-64 and + /// AArch64. This convention differs from the more common + /// \c X86_64_SysV convention in a number of ways, most notably in + /// that XMM registers used to pass arguments are shadowed by GPRs, + /// and vice versa. + /// On AArch64, this is identical to the normal C (AAPCS) calling + /// convention for normal functions, but floats are passed in integer + /// registers to variadic functions. + Win64 = 79, /// \brief MSVC calling convention that passes vectors and vector aggregates /// in SSE registers. diff --git a/contrib/llvm/include/llvm/IR/Constants.h b/contrib/llvm/include/llvm/IR/Constants.h index 2e72c41ccee3..0094fd54992a 100644 --- a/contrib/llvm/include/llvm/IR/Constants.h +++ b/contrib/llvm/include/llvm/IR/Constants.h @@ -598,6 +598,10 @@ class ConstantDataSequential : public ConstantData { /// specified element in the low bits of a uint64_t. uint64_t getElementAsInteger(unsigned i) const; + /// If this is a sequential container of integers (of any size), return the + /// specified element as an APInt. + APInt getElementAsAPInt(unsigned i) const; + /// If this is a sequential container of floating point type, return the /// specified element as an APFloat. APFloat getElementAsAPFloat(unsigned i) const; @@ -761,6 +765,10 @@ class ConstantDataVector final : public ConstantDataSequential { /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// Returns true if this is a splat constant, meaning that all elements have + /// the same value. + bool isSplat() const; + /// If this is a splat constant, meaning that all of the elements have the /// same value, return that value. Otherwise return NULL. Constant *getSplatValue() const; diff --git a/contrib/llvm/include/llvm/IR/DIBuilder.h b/contrib/llvm/include/llvm/IR/DIBuilder.h index 8e6bb4baccaf..6a14f783005d 100644 --- a/contrib/llvm/include/llvm/IR/DIBuilder.h +++ b/contrib/llvm/include/llvm/IR/DIBuilder.h @@ -674,32 +674,37 @@ namespace llvm { /// Create a descriptor for an imported module. /// \param Context The scope this module is imported into - /// \param NS The namespace being imported here - /// \param Line Line number + /// \param NS The namespace being imported here. + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedModule(DIScope *Context, DINamespace *NS, - unsigned Line); + DIFile *File, unsigned Line); /// Create a descriptor for an imported module. - /// \param Context The scope this module is imported into - /// \param NS An aliased namespace - /// \param Line Line number + /// \param Context The scope this module is imported into. + /// \param NS An aliased namespace. + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedModule(DIScope *Context, - DIImportedEntity *NS, unsigned Line); + DIImportedEntity *NS, DIFile *File, + unsigned Line); /// Create a descriptor for an imported module. - /// \param Context The scope this module is imported into - /// \param M The module being imported here - /// \param Line Line number + /// \param Context The scope this module is imported into. + /// \param M The module being imported here + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M, - unsigned Line); + DIFile *File, unsigned Line); /// Create a descriptor for an imported function. - /// \param Context The scope this module is imported into - /// \param Decl The declaration (or definition) of a function, type, or - /// variable - /// \param Line Line number + /// \param Context The scope this module is imported into. + /// \param Decl The declaration (or definition) of a function, type, or + /// variable. + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. DIImportedEntity *createImportedDeclaration(DIScope *Context, DINode *Decl, - unsigned Line, + DIFile *File, unsigned Line, StringRef Name = ""); /// Insert a new llvm.dbg.declare intrinsic call. diff --git a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h index 9374fe4fae76..678a43ae7926 100644 --- a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -435,10 +435,10 @@ class DIScope : public DINode { /// Return the raw underlying file. /// - /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file - /// (it\em is the file). If \c this is an \a DIFile, we need to return \c - /// this. Otherwise, return the first operand, which is where all other - /// subclasses store their file pointer. + /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file (it + /// \em is the file). If \c this is an \a DIFile, we need to return \c this. + /// Otherwise, return the first operand, which is where all other subclasses + /// store their file pointer. Metadata *getRawFile() const { return isa(this) ? const_cast(this) : static_cast(getOperand(0)); @@ -2551,32 +2551,32 @@ class DIImportedEntity : public DINode { static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag, DIScope *Scope, DINodeRef Entity, - unsigned Line, StringRef Name, + DIFile *File, unsigned Line, StringRef Name, StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, Tag, Scope, Entity, Line, + return getImpl(Context, Tag, Scope, Entity, File, Line, getCanonicalMDString(Context, Name), Storage, ShouldCreate); } static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity, - unsigned Line, MDString *Name, - StorageType Storage, + Metadata *File, unsigned Line, + MDString *Name, StorageType Storage, bool ShouldCreate = true); TempDIImportedEntity cloneImpl() const { return getTemporary(getContext(), getTag(), getScope(), getEntity(), - getLine(), getName()); + getFile(), getLine(), getName()); } public: DEFINE_MDNODE_GET(DIImportedEntity, (unsigned Tag, DIScope *Scope, DINodeRef Entity, - unsigned Line, StringRef Name = ""), - (Tag, Scope, Entity, Line, Name)) + DIFile *File, unsigned Line, StringRef Name = ""), + (Tag, Scope, Entity, File, Line, Name)) DEFINE_MDNODE_GET(DIImportedEntity, (unsigned Tag, Metadata *Scope, Metadata *Entity, - unsigned Line, MDString *Name), - (Tag, Scope, Entity, Line, Name)) + Metadata *File, unsigned Line, MDString *Name), + (Tag, Scope, Entity, File, Line, Name)) TempDIImportedEntity clone() const { return cloneImpl(); } @@ -2584,10 +2584,12 @@ class DIImportedEntity : public DINode { DIScope *getScope() const { return cast_or_null(getRawScope()); } DINodeRef getEntity() const { return DINodeRef(getRawEntity()); } StringRef getName() const { return getStringOperand(2); } + DIFile *getFile() const { return cast_or_null(getRawFile()); } Metadata *getRawScope() const { return getOperand(0); } Metadata *getRawEntity() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs(2); } + Metadata *getRawFile() const { return getOperand(3); } static bool classof(const Metadata *MD) { return MD->getMetadataID() == DIImportedEntityKind; diff --git a/contrib/llvm/include/llvm/IR/Dominators.h b/contrib/llvm/include/llvm/IR/Dominators.h index e10d14c19793..5b21a2c83e4a 100644 --- a/contrib/llvm/include/llvm/IR/Dominators.h +++ b/contrib/llvm/include/llvm/IR/Dominators.h @@ -34,22 +34,31 @@ class Module; class raw_ostream; extern template class DomTreeNodeBase; -extern template class DominatorTreeBase; +extern template class DominatorTreeBase; // DomTree +extern template class DominatorTreeBase; // PostDomTree namespace DomTreeBuilder { -extern template void Calculate( - DominatorTreeBaseByGraphTraits> &DT, Function &F); +using BBDomTree = DomTreeBase; +using BBPostDomTree = PostDomTreeBase; -extern template void Calculate>( - DominatorTreeBaseByGraphTraits>> &DT, - Function &F); +extern template void Calculate(BBDomTree &DT, Function &F); +extern template void Calculate(BBPostDomTree &DT, + Function &F); -extern template bool Verify( - const DominatorTreeBaseByGraphTraits> &DT); +extern template void InsertEdge(BBDomTree &DT, BasicBlock *From, + BasicBlock *To); +extern template void InsertEdge(BBPostDomTree &DT, + BasicBlock *From, + BasicBlock *To); -extern template bool Verify>( - const DominatorTreeBaseByGraphTraits>> - &DT); +extern template void DeleteEdge(BBDomTree &DT, BasicBlock *From, + BasicBlock *To); +extern template void DeleteEdge(BBPostDomTree &DT, + BasicBlock *From, + BasicBlock *To); + +extern template bool Verify(const BBDomTree &DT); +extern template bool Verify(const BBPostDomTree &DT); } // namespace DomTreeBuilder using DomTreeNode = DomTreeNodeBase; @@ -122,14 +131,12 @@ template <> struct DenseMapInfo { /// the dominator tree is initially constructed may still exist in the tree, /// even if the tree is properly updated. Calling code should not rely on the /// preceding statements; this is stated only to assist human understanding. -class DominatorTree : public DominatorTreeBase { -public: - using Base = DominatorTreeBase; +class DominatorTree : public DominatorTreeBase { + public: + using Base = DominatorTreeBase; - DominatorTree() : DominatorTreeBase(false) {} - explicit DominatorTree(Function &F) : DominatorTreeBase(false) { - recalculate(F); - } + DominatorTree() = default; + explicit DominatorTree(Function &F) { recalculate(F); } /// Handle invalidation explicitly. bool invalidate(Function &F, const PreservedAnalyses &PA, diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td index 8ac56e03be6a..098245344725 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td @@ -32,16 +32,6 @@ class Hexagon_qi_mem_Intrinsic : Hexagon_Intrinsic; - -// -// DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) -> -// Hexagon_void_si_Intrinsic -// -class Hexagon_void_si_Intrinsic - : Hexagon_Intrinsic; - // // DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) -> // Hexagon_hi_si_Intrinsic @@ -4959,11 +4949,25 @@ Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">; // def int_hexagon_S2_deinterleave : Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">; + // // BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1) // def int_hexagon_prefetch : -Hexagon_void_si_Intrinsic<"HEXAGON_prefetch">; +Hexagon_Intrinsic<"HEXAGON_prefetch", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dccleana : +Hexagon_Intrinsic<"HEXAGON_Y2_dccleana", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dccleaninva : +Hexagon_Intrinsic<"HEXAGON_Y2_dccleaninva", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dcinva : +Hexagon_Intrinsic<"HEXAGON_Y2_dcinva", [], [llvm_ptr_ty], []>; +def int_hexagon_Y2_dczeroa : +Hexagon_Intrinsic<"HEXAGON_Y2_dczeroa", [], [llvm_ptr_ty], + [IntrWriteMem, IntrArgMemOnly, IntrHasSideEffects]>; +def int_hexagon_Y4_l2fetch : +Hexagon_Intrinsic<"HEXAGON_Y4_l2fetch", [], [llvm_ptr_ty, llvm_i32_ty], []>; +def int_hexagon_Y5_l2fetch : +Hexagon_Intrinsic<"HEXAGON_Y5_l2fetch", [], [llvm_ptr_ty, llvm_i64_ty], []>; def llvm_ptr32_ty : LLVMPointerType; def llvm_ptr64_ty : LLVMPointerType; diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td index 9be37d3645b2..98065bc51d99 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td @@ -373,6 +373,49 @@ let TargetPrefix = "s390" in { def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + + // Instructions from the Vector Enhancements Facility 1 + def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty, + llvm_v16i8_ty>; + + def int_s390_vmslg : GCCBuiltin<"__builtin_s390_vmslg">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_s390_vfmaxdb : Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_s390_vfcesbs : SystemZBinaryConvCC; + def int_s390_vfchsbs : SystemZBinaryConvCC; + def int_s390_vfchesbs : SystemZBinaryConvCC; + + def int_s390_vftcisb : SystemZBinaryConvIntCC; + + def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + + // Instructions from the Vector Packed Decimal Facility + def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">, + Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + + def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">, + Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], + // In fact write-only but there's no property + // for that. + [IntrArgMemOnly]>; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/include/llvm/MC/LaneBitmask.h b/contrib/llvm/include/llvm/MC/LaneBitmask.h index 5ca06d1148e2..73b987b074db 100644 --- a/contrib/llvm/include/llvm/MC/LaneBitmask.h +++ b/contrib/llvm/include/llvm/MC/LaneBitmask.h @@ -75,6 +75,9 @@ namespace llvm { static LaneBitmask getNone() { return LaneBitmask(0); } static LaneBitmask getAll() { return ~LaneBitmask(0); } + static LaneBitmask getLane(unsigned Lane) { + return LaneBitmask(Type(1) << Lane); + } private: Type Mask = 0; diff --git a/contrib/llvm/include/llvm/MC/MCFixup.h b/contrib/llvm/include/llvm/MC/MCFixup.h index b493ca0b0ea7..b83086c327f2 100644 --- a/contrib/llvm/include/llvm/MC/MCFixup.h +++ b/contrib/llvm/include/llvm/MC/MCFixup.h @@ -69,7 +69,7 @@ class MCFixup { /// an instruction or an assembler directive. const MCExpr *Value; - /// The byte index of start of the relocation inside the encoded instruction. + /// The byte index of start of the relocation inside the MCFragment. uint32_t Offset; /// The target dependent kind of fixup item this is. The kind is used to diff --git a/contrib/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm/include/llvm/MC/MCInstrDesc.h index 340d8253b8c9..9150a8b5c80a 100644 --- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h +++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h @@ -209,6 +209,15 @@ class MCInstrDesc { /// well. unsigned getNumOperands() const { return NumOperands; } + using const_opInfo_iterator = const MCOperandInfo *; + + const_opInfo_iterator opInfo_begin() const { return OpInfo; } + const_opInfo_iterator opInfo_end() const { return OpInfo + NumOperands; } + + iterator_range operands() const { + return make_range(opInfo_begin(), opInfo_end()); + } + /// \brief Return the number of MachineOperands that are register /// definitions. Register definitions always occur at the start of the /// machine operand list. This is the number of "outs" in the .td file, diff --git a/contrib/llvm/include/llvm/Object/COFFImportFile.h b/contrib/llvm/include/llvm/Object/COFFImportFile.h index 060f965233e1..8e215b565fc4 100644 --- a/contrib/llvm/include/llvm/Object/COFFImportFile.h +++ b/contrib/llvm/include/llvm/Object/COFFImportFile.h @@ -95,7 +95,7 @@ struct COFFShortExport { } }; -std::error_code writeImportLibrary(StringRef DLLName, +std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef Exports, COFF::MachineTypes Machine); diff --git a/contrib/llvm/include/llvm/Object/COFFModuleDefinition.h b/contrib/llvm/include/llvm/Object/COFFModuleDefinition.h index a0e8eacdb7a3..be139a2833b0 100644 --- a/contrib/llvm/include/llvm/Object/COFFModuleDefinition.h +++ b/contrib/llvm/include/llvm/Object/COFFModuleDefinition.h @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_OBJECT_COFF_MODULE_DEFINITION_H #define LLVM_OBJECT_COFF_MODULE_DEFINITION_H @@ -29,6 +28,7 @@ namespace object { struct COFFModuleDefinition { std::vector Exports; std::string OutputFile; + std::string ImportName; uint64_t ImageBase = 0; uint64_t StackReserve = 0; uint64_t StackCommit = 0; @@ -40,8 +40,12 @@ struct COFFModuleDefinition { uint32_t MinorOSVersion = 0; }; +// mingw and wine def files do not mangle _ for x86 which +// is a consequence of legacy binutils' dlltool functionality. +// This MingwDef flag should be removed once mingw stops this pratice. Expected -parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine); +parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, + bool MingwDef = false); } // End namespace object. } // End namespace llvm. diff --git a/contrib/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/contrib/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypes.h index 6746fd60b6cb..88a5668f0a14 100644 --- a/contrib/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypes.h +++ b/contrib/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypes.h @@ -60,6 +60,8 @@ ArrayRef toDebugT(ArrayRef, BumpPtrAllocator &Alloc); } // end namespace llvm +LLVM_YAML_DECLARE_SCALAR_TRAITS(codeview::GUID, true) + LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::LeafRecord) LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord) diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def index 8eccebcd932a..09f9602a24d9 100644 --- a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def @@ -43,8 +43,9 @@ AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") -AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") -AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") +AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME diff --git a/contrib/llvm/include/llvm/Support/BinaryItemStream.h b/contrib/llvm/include/llvm/Support/BinaryItemStream.h index f4b319217819..fe7e6caeaafb 100644 --- a/contrib/llvm/include/llvm/Support/BinaryItemStream.h +++ b/contrib/llvm/include/llvm/Support/BinaryItemStream.h @@ -62,32 +62,45 @@ class BinaryItemStream : public BinaryStream { return Error::success(); } - void setItems(ArrayRef ItemArray) { Items = ItemArray; } + void setItems(ArrayRef ItemArray) { + Items = ItemArray; + computeItemOffsets(); + } uint32_t getLength() override { - uint32_t Size = 0; - for (const auto &Item : Items) - Size += Traits::length(Item); - return Size; + return ItemEndOffsets.empty() ? 0 : ItemEndOffsets.back(); } private: - Expected translateOffsetIndex(uint32_t Offset) const { + void computeItemOffsets() { + ItemEndOffsets.clear(); + ItemEndOffsets.reserve(Items.size()); uint32_t CurrentOffset = 0; - uint32_t CurrentIndex = 0; for (const auto &Item : Items) { - if (CurrentOffset >= Offset) - break; - CurrentOffset += Traits::length(Item); - ++CurrentIndex; + uint32_t Len = Traits::length(Item); + assert(Len > 0 && "no empty items"); + CurrentOffset += Len; + ItemEndOffsets.push_back(CurrentOffset); } - if (CurrentOffset != Offset) + } + + Expected translateOffsetIndex(uint32_t Offset) { + // Make sure the offset is somewhere in our items array. + if (Offset >= getLength()) return make_error(stream_error_code::stream_too_short); - return CurrentIndex; + ++Offset; + auto Iter = + std::lower_bound(ItemEndOffsets.begin(), ItemEndOffsets.end(), Offset); + size_t Idx = std::distance(ItemEndOffsets.begin(), Iter); + assert(Idx < Items.size() && "binary search for offset failed"); + return Idx; } llvm::support::endianness Endian; ArrayRef Items; + + // Sorted vector of offsets to accelerate lookup. + std::vector ItemEndOffsets; }; } // end namespace llvm diff --git a/contrib/llvm/include/llvm/Support/Format.h b/contrib/llvm/include/llvm/Support/Format.h index 017b4973f1ff..bcbd2bec5722 100644 --- a/contrib/llvm/include/llvm/Support/Format.h +++ b/contrib/llvm/include/llvm/Support/Format.h @@ -125,30 +125,39 @@ inline format_object format(const char *Fmt, const Ts &... Vals) { return format_object(Fmt, Vals...); } -/// This is a helper class used for left_justify() and right_justify(). +/// This is a helper class for left_justify, right_justify, and center_justify. class FormattedString { +public: + enum Justification { JustifyNone, JustifyLeft, JustifyRight, JustifyCenter }; + FormattedString(StringRef S, unsigned W, Justification J) + : Str(S), Width(W), Justify(J) {} + +private: StringRef Str; unsigned Width; - bool RightJustify; + Justification Justify; friend class raw_ostream; - -public: - FormattedString(StringRef S, unsigned W, bool R) - : Str(S), Width(W), RightJustify(R) { } }; /// left_justify - append spaces after string so total output is /// \p Width characters. If \p Str is larger that \p Width, full string /// is written with no padding. inline FormattedString left_justify(StringRef Str, unsigned Width) { - return FormattedString(Str, Width, false); + return FormattedString(Str, Width, FormattedString::JustifyLeft); } /// right_justify - add spaces before string so total output is /// \p Width characters. If \p Str is larger that \p Width, full string /// is written with no padding. inline FormattedString right_justify(StringRef Str, unsigned Width) { - return FormattedString(Str, Width, true); + return FormattedString(Str, Width, FormattedString::JustifyRight); +} + +/// center_justify - add spaces before and after string so total output is +/// \p Width characters. If \p Str is larger that \p Width, full string +/// is written with no padding. +inline FormattedString center_justify(StringRef Str, unsigned Width) { + return FormattedString(Str, Width, FormattedString::JustifyCenter); } /// This is a helper class used for format_hex() and format_decimal(). diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h index 394a45387d8a..706320fed9a7 100644 --- a/contrib/llvm/include/llvm/Support/GenericDomTree.h +++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h @@ -41,27 +41,21 @@ namespace llvm { -template class DominatorTreeBase; +template +class DominatorTreeBase; -namespace detail { - -template struct DominatorTreeBaseTraits { - static_assert(std::is_pointer::value, - "Currently NodeRef must be a pointer type."); - using type = DominatorTreeBase< - typename std::remove_pointer::type>; -}; - -} // end namespace detail - -template -using DominatorTreeBaseByGraphTraits = - typename detail::DominatorTreeBaseTraits::type; +namespace DomTreeBuilder { +template +struct SemiNCAInfo; +} // namespace DomTreeBuilder /// \brief Base class for the actual dominator tree node. template class DomTreeNodeBase { friend struct PostDominatorTree; - template friend class DominatorTreeBase; + friend class DominatorTreeBase; + friend class DominatorTreeBase; + friend struct DomTreeBuilder::SemiNCAInfo>; + friend struct DomTreeBuilder::SemiNCAInfo>; NodeT *TheBB; DomTreeNodeBase *IDom; @@ -192,58 +186,69 @@ void PrintDomTree(const DomTreeNodeBase *N, raw_ostream &O, } namespace DomTreeBuilder { -template -struct SemiNCAInfo; +// The routines below are provided in a separate header but referenced here. +template +void Calculate(DomTreeT &DT, FuncT &F); -// The calculate routine is provided in a separate header but referenced here. -template -void Calculate(DominatorTreeBaseByGraphTraits> &DT, FuncT &F); +template +void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To); -// The verify function is provided in a separate header but referenced here. -template -bool Verify(const DominatorTreeBaseByGraphTraits> &DT); +template +void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To); + +template +bool Verify(const DomTreeT &DT); } // namespace DomTreeBuilder /// \brief Core dominator tree base class. /// /// This class is a generic template over graph nodes. It is instantiated for /// various graphs in the LLVM IR or in the code generator. -template class DominatorTreeBase { +template +class DominatorTreeBase { protected: std::vector Roots; - bool IsPostDominators; using DomTreeNodeMapType = DenseMap>>; DomTreeNodeMapType DomTreeNodes; DomTreeNodeBase *RootNode; + using ParentPtr = decltype(std::declval()->getParent()); + ParentPtr Parent = nullptr; mutable bool DFSInfoValid = false; mutable unsigned int SlowQueries = 0; - friend struct DomTreeBuilder::SemiNCAInfo; - using SNCAInfoTy = DomTreeBuilder::SemiNCAInfo; + friend struct DomTreeBuilder::SemiNCAInfo; public: - explicit DominatorTreeBase(bool isPostDom) : IsPostDominators(isPostDom) {} + static_assert(std::is_pointer::NodeRef>::value, + "Currently DominatorTreeBase supports only pointer nodes"); + using NodeType = NodeT; + using NodePtr = NodeT *; + static constexpr bool IsPostDominator = IsPostDom; + + DominatorTreeBase() {} DominatorTreeBase(DominatorTreeBase &&Arg) : Roots(std::move(Arg.Roots)), - IsPostDominators(Arg.IsPostDominators), DomTreeNodes(std::move(Arg.DomTreeNodes)), - RootNode(std::move(Arg.RootNode)), - DFSInfoValid(std::move(Arg.DFSInfoValid)), - SlowQueries(std::move(Arg.SlowQueries)) { + RootNode(Arg.RootNode), + Parent(Arg.Parent), + DFSInfoValid(Arg.DFSInfoValid), + SlowQueries(Arg.SlowQueries) { Arg.wipe(); } DominatorTreeBase &operator=(DominatorTreeBase &&RHS) { Roots = std::move(RHS.Roots); - IsPostDominators = RHS.IsPostDominators; DomTreeNodes = std::move(RHS.DomTreeNodes); - RootNode = std::move(RHS.RootNode); - DFSInfoValid = std::move(RHS.DFSInfoValid); - SlowQueries = std::move(RHS.SlowQueries); + RootNode = RHS.RootNode; + Parent = RHS.Parent; + DFSInfoValid = RHS.DFSInfoValid; + SlowQueries = RHS.SlowQueries; RHS.wipe(); return *this; } @@ -259,11 +264,12 @@ template class DominatorTreeBase { /// isPostDominator - Returns true if analysis based of postdoms /// - bool isPostDominator() const { return IsPostDominators; } + bool isPostDominator() const { return IsPostDominator; } /// compare - Return false if the other dominator tree base matches this /// dominator tree base. Otherwise return true. bool compare(const DominatorTreeBase &Other) const { + if (Parent != Other.Parent) return true; const DomTreeNodeMapType &OtherDomTreeNodes = Other.DomTreeNodes; if (DomTreeNodes.size() != OtherDomTreeNodes.size()) @@ -443,10 +449,50 @@ template class DominatorTreeBase { const_cast(B)); } + bool isVirtualRoot(const DomTreeNodeBase *A) const { + return isPostDominator() && !A->getBlock(); + } + //===--------------------------------------------------------------------===// // API to update (Post)DominatorTree information based on modifications to // the CFG... + /// Inform the dominator tree about a CFG edge insertion and update the tree. + /// + /// This function has to be called just before or just after making the update + /// on the actual CFG. There cannot be any other updates that the dominator + /// tree doesn't know about. + /// + /// Note that for postdominators it automatically takes care of inserting + /// a reverse edge internally (so there's no need to swap the parameters). + /// + void insertEdge(NodeT *From, NodeT *To) { + assert(From); + assert(To); + assert(From->getParent() == Parent); + assert(To->getParent() == Parent); + DomTreeBuilder::InsertEdge(*this, From, To); + } + + /// Inform the dominator tree about a CFG edge deletion and update the tree. + /// + /// This function has to be called just after making the update + /// on the actual CFG. There cannot be any other updates that the dominator + /// tree doesn't know about. The only exception is when the deletion that the + /// tree is informed about makes some (domominator) subtree unreachable -- in + /// this case, it is fine to perform deletions within this subtree. + /// + /// Note that for postdominators it automatically takes care of deleting + /// a reverse edge internally (so there's no need to swap the parameters). + /// + void deleteEdge(NodeT *From, NodeT *To) { + assert(From); + assert(To); + assert(From->getParent() == Parent); + assert(To->getParent() == Parent); + DomTreeBuilder::DeleteEdge(*this, From, To); + } + /// Add a new node to the dominator tree information. /// /// This creates a new node as a child of DomBB dominator node, linking it @@ -530,7 +576,7 @@ template class DominatorTreeBase { /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. void splitBlock(NodeT *NewBB) { - if (this->IsPostDominators) + if (IsPostDominator) Split>(NewBB); else Split(NewBB); @@ -607,37 +653,33 @@ template class DominatorTreeBase { template void recalculate(FT &F) { using TraitsTy = GraphTraits; reset(); + Parent = &F; - if (!this->IsPostDominators) { + if (!IsPostDominator) { // Initialize root NodeT *entry = TraitsTy::getEntryNode(&F); addRoot(entry); - - DomTreeBuilder::Calculate(*this, F); } else { // Initialize the roots list for (auto *Node : nodes(&F)) if (TraitsTy::child_begin(Node) == TraitsTy::child_end(Node)) addRoot(Node); - - DomTreeBuilder::Calculate>(*this, F); } + + DomTreeBuilder::Calculate(*this, F); } /// verify - check parent and sibling property - bool verify() const { - return this->isPostDominator() - ? DomTreeBuilder::Verify>(*this) - : DomTreeBuilder::Verify(*this); - } + bool verify() const { return DomTreeBuilder::Verify(*this); } protected: void addRoot(NodeT *BB) { this->Roots.push_back(BB); } void reset() { DomTreeNodes.clear(); - this->Roots.clear(); + Roots.clear(); RootNode = nullptr; + Parent = nullptr; DFSInfoValid = false; SlowQueries = 0; } @@ -719,13 +761,21 @@ template class DominatorTreeBase { void wipe() { DomTreeNodes.clear(); RootNode = nullptr; + Parent = nullptr; } }; +template +using DomTreeBase = DominatorTreeBase; + +template +using PostDomTreeBase = DominatorTreeBase; + // These two functions are declared out of line as a workaround for building // with old (< r147295) versions of clang because of pr11642. -template -bool DominatorTreeBase::dominates(const NodeT *A, const NodeT *B) const { +template +bool DominatorTreeBase::dominates(const NodeT *A, + const NodeT *B) const { if (A == B) return true; @@ -735,9 +785,9 @@ bool DominatorTreeBase::dominates(const NodeT *A, const NodeT *B) const { return dominates(getNode(const_cast(A)), getNode(const_cast(B))); } -template -bool DominatorTreeBase::properlyDominates(const NodeT *A, - const NodeT *B) const { +template +bool DominatorTreeBase::properlyDominates( + const NodeT *A, const NodeT *B) const { if (A == B) return false; diff --git a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h index a0fec668e05c..be90afa4c3c8 100644 --- a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -20,15 +20,28 @@ /// out that the theoretically slower O(n*log(n)) implementation is actually /// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs. /// +/// The file uses the Depth Based Search algorithm to perform incremental +/// upates (insertion and deletions). The implemented algorithm is based on this +/// publication: +/// +/// An Experimental Study of Dynamic Dominators +/// Loukas Georgiadis, et al., April 12 2016, pp. 5-7, 9-10: +/// https://arxiv.org/pdf/1604.02711.pdf +/// //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H #define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H +#include +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/GenericDomTree.h" +#define DEBUG_TYPE "dom-tree-builder" + namespace llvm { namespace DomTreeBuilder { @@ -46,13 +59,14 @@ struct ChildrenGetter { } }; -// Information record used by Semi-NCA during tree construction. -template +template struct SemiNCAInfo { - using NodePtr = NodeT *; - using DomTreeT = DominatorTreeBase; + using NodePtr = typename DomTreeT::NodePtr; + using NodeT = typename DomTreeT::NodeType; using TreeNodePtr = DomTreeNodeBase *; + static constexpr bool IsPostDom = DomTreeT::IsPostDominator; + // Information record used by Semi-NCA during tree construction. struct InfoRec { unsigned DFSNum = 0; unsigned Parent = 0; @@ -62,11 +76,13 @@ struct SemiNCAInfo { SmallVector ReverseChildren; }; - std::vector NumToNode; + // Number to node mapping is 1-based. Initialize the mapping to start with + // a dummy element. + std::vector NumToNode = {nullptr}; DenseMap NodeToInfo; void clear() { - NumToNode.clear(); + NumToNode = {nullptr}; // Restore to initial state with a dummy start node. NodeToInfo.clear(); } @@ -90,12 +106,28 @@ struct SemiNCAInfo { // Add a new tree node for this NodeT, and link it as a child of // IDomNode return (DT.DomTreeNodes[BB] = IDomNode->addChild( - llvm::make_unique>(BB, IDomNode))) + llvm::make_unique>(BB, IDomNode))) .get(); } static bool AlwaysDescend(NodePtr, NodePtr) { return true; } + struct BlockNamePrinter { + NodePtr N; + + BlockNamePrinter(NodePtr Block) : N(Block) {} + BlockNamePrinter(TreeNodePtr TN) : N(TN ? TN->getBlock() : nullptr) {} + + friend raw_ostream &operator<<(raw_ostream &O, const BlockNamePrinter &BP) { + if (!BP.N) + O << "nullptr"; + else + BP.N->printAsOperand(O, false); + + return O; + } + }; + // Custom DFS implementation which can skip nodes based on a provided // predicate. It also collects ReverseChildren so that we don't have to spend // time getting predecessors in SemiNCA. @@ -177,44 +209,42 @@ struct SemiNCAInfo { return VInInfo.Label; } - template - void runSemiNCA(DomTreeT &DT, unsigned NumBlocks) { - // Step #1: Number blocks in depth-first order and initialize variables used - // in later stages of the algorithm. - const unsigned N = doFullDFSWalk(DT, AlwaysDescend); - - // It might be that some blocks did not get a DFS number (e.g., blocks of - // infinite loops). In these cases an artificial exit node is required. - const bool MultipleRoots = - DT.Roots.size() > 1 || (DT.isPostDominator() && N != NumBlocks); - + // This function requires DFS to be run before calling it. + void runSemiNCA(DomTreeT &DT, const unsigned MinLevel = 0) { + const unsigned NextDFSNum(NumToNode.size()); // Initialize IDoms to spanning tree parents. - for (unsigned i = 1; i <= N; ++i) { + for (unsigned i = 1; i < NextDFSNum; ++i) { const NodePtr V = NumToNode[i]; auto &VInfo = NodeToInfo[V]; VInfo.IDom = NumToNode[VInfo.Parent]; } - // Step #2: Calculate the semidominators of all vertices. - for (unsigned i = N; i >= 2; --i) { + // Step #1: Calculate the semidominators of all vertices. + for (unsigned i = NextDFSNum - 1; i >= 2; --i) { NodePtr W = NumToNode[i]; auto &WInfo = NodeToInfo[W]; // Initialize the semi dominator to point to the parent node. WInfo.Semi = WInfo.Parent; - for (const auto &N : WInfo.ReverseChildren) - if (NodeToInfo.count(N)) { // Only if this predecessor is reachable! - unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi; - if (SemiU < WInfo.Semi) - WInfo.Semi = SemiU; - } + for (const auto &N : WInfo.ReverseChildren) { + if (NodeToInfo.count(N) == 0) // Skip unreachable predecessors. + continue; + + const TreeNodePtr TN = DT.getNode(N); + // Skip predecessors whose level is above the subtree we are processing. + if (TN && TN->getLevel() < MinLevel) + continue; + + unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi; + if (SemiU < WInfo.Semi) WInfo.Semi = SemiU; + } } - // Step #3: Explicitly define the immediate dominator of each vertex. + // Step #2: Explicitly define the immediate dominator of each vertex. // IDom[i] = NCA(SDom[i], SpanningTreeParent(i)). // Note that the parents were stored in IDoms and later got invalidated // during path compression in Eval. - for (unsigned i = 2; i <= N; ++i) { + for (unsigned i = 2; i < NextDFSNum; ++i) { const NodePtr W = NumToNode[i]; auto &WInfo = NodeToInfo[W]; const unsigned SDomNum = NodeToInfo[NumToNode[WInfo.Semi]].DFSNum; @@ -224,46 +254,11 @@ struct SemiNCAInfo { WInfo.IDom = WIDomCandidate; } - - if (DT.Roots.empty()) return; - - // Add a node for the root. This node might be the actual root, if there is - // one exit block, or it may be the virtual exit (denoted by - // (BasicBlock *)0) which postdominates all real exits if there are multiple - // exit blocks, or an infinite loop. - NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr; - - DT.RootNode = - (DT.DomTreeNodes[Root] = - llvm::make_unique>(Root, nullptr)) - .get(); - - // Loop over all of the reachable blocks in the function... - for (unsigned i = 2; i <= N; ++i) { - NodePtr W = NumToNode[i]; - - // Don't replace this with 'count', the insertion side effect is important - if (DT.DomTreeNodes[W]) - continue; // Haven't calculated this node yet? - - NodePtr ImmDom = getIDom(W); - - assert(ImmDom || DT.DomTreeNodes[nullptr]); - - // Get or calculate the node for the immediate dominator - TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT); - - // Add a new tree node for this BasicBlock, and link it as a child of - // IDomNode - DT.DomTreeNodes[W] = IDomNode->addChild( - llvm::make_unique>(W, IDomNode)); - } } template unsigned doFullDFSWalk(const DomTreeT &DT, DescendCondition DC) { unsigned Num = 0; - NumToNode.push_back(nullptr); if (DT.Roots.size() > 1) { auto &BBInfo = NodeToInfo[nullptr]; @@ -283,11 +278,257 @@ struct SemiNCAInfo { return Num; } - static void PrintBlockOrNullptr(raw_ostream &O, NodePtr Obj) { - if (!Obj) - O << "nullptr"; + void calculateFromScratch(DomTreeT &DT, const unsigned NumBlocks) { + // Step #0: Number blocks in depth-first order and initialize variables used + // in later stages of the algorithm. + const unsigned LastDFSNum = doFullDFSWalk(DT, AlwaysDescend); + + runSemiNCA(DT); + + if (DT.Roots.empty()) return; + + // Add a node for the root. This node might be the actual root, if there is + // one exit block, or it may be the virtual exit (denoted by + // (BasicBlock *)0) which postdominates all real exits if there are multiple + // exit blocks, or an infinite loop. + // It might be that some blocks did not get a DFS number (e.g., blocks of + // infinite loops). In these cases an artificial exit node is required. + const bool MultipleRoots = DT.Roots.size() > 1 || (DT.isPostDominator() && + LastDFSNum != NumBlocks); + NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr; + + DT.RootNode = (DT.DomTreeNodes[Root] = + llvm::make_unique>(Root, nullptr)) + .get(); + attachNewSubtree(DT, DT.RootNode); + } + + void attachNewSubtree(DomTreeT& DT, const TreeNodePtr AttachTo) { + // Attach the first unreachable block to AttachTo. + NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock(); + // Loop over all of the discovered blocks in the function... + for (size_t i = 1, e = NumToNode.size(); i != e; ++i) { + NodePtr W = NumToNode[i]; + DEBUG(dbgs() << "\tdiscovered a new reachable node " + << BlockNamePrinter(W) << "\n"); + + // Don't replace this with 'count', the insertion side effect is important + if (DT.DomTreeNodes[W]) continue; // Haven't calculated this node yet? + + NodePtr ImmDom = getIDom(W); + + // Get or calculate the node for the immediate dominator + TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT); + + // Add a new tree node for this BasicBlock, and link it as a child of + // IDomNode + DT.DomTreeNodes[W] = IDomNode->addChild( + llvm::make_unique>(W, IDomNode)); + } + } + + void reattachExistingSubtree(DomTreeT &DT, const TreeNodePtr AttachTo) { + NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock(); + for (size_t i = 1, e = NumToNode.size(); i != e; ++i) { + const NodePtr N = NumToNode[i]; + const TreeNodePtr TN = DT.getNode(N); + assert(TN); + const TreeNodePtr NewIDom = DT.getNode(NodeToInfo[N].IDom); + TN->setIDom(NewIDom); + } + } + + // Helper struct used during edge insertions. + struct InsertionInfo { + using BucketElementTy = std::pair; + struct DecreasingLevel { + bool operator()(const BucketElementTy &First, + const BucketElementTy &Second) const { + return First.first > Second.first; + } + }; + + std::priority_queue, + DecreasingLevel> + Bucket; // Queue of tree nodes sorted by level in descending order. + SmallDenseSet Affected; + SmallDenseSet Visited; + SmallVector AffectedQueue; + SmallVector VisitedNotAffectedQueue; + }; + + static void InsertEdge(DomTreeT &DT, const NodePtr From, const NodePtr To) { + assert(From && To && "Cannot connect nullptrs"); + DEBUG(dbgs() << "Inserting edge " << BlockNamePrinter(From) << " -> " + << BlockNamePrinter(To) << "\n"); + const TreeNodePtr FromTN = DT.getNode(From); + + // Ignore edges from unreachable nodes. + if (!FromTN) return; + + DT.DFSInfoValid = false; + + const TreeNodePtr ToTN = DT.getNode(To); + if (!ToTN) + InsertUnreachable(DT, FromTN, To); else - Obj->printAsOperand(O, false); + InsertReachable(DT, FromTN, ToTN); + } + + // Handles insertion to a node already in the dominator tree. + static void InsertReachable(DomTreeT &DT, const TreeNodePtr From, + const TreeNodePtr To) { + DEBUG(dbgs() << "\tReachable " << BlockNamePrinter(From->getBlock()) + << " -> " << BlockNamePrinter(To->getBlock()) << "\n"); + const NodePtr NCDBlock = + DT.findNearestCommonDominator(From->getBlock(), To->getBlock()); + assert(NCDBlock || DT.isPostDominator()); + const TreeNodePtr NCD = DT.getNode(NCDBlock); + assert(NCD); + + DEBUG(dbgs() << "\t\tNCA == " << BlockNamePrinter(NCD) << "\n"); + const TreeNodePtr ToIDom = To->getIDom(); + + // Nothing affected -- NCA property holds. + // (Based on the lemma 2.5 from the second paper.) + if (NCD == To || NCD == ToIDom) return; + + // Identify and collect affected nodes. + InsertionInfo II; + DEBUG(dbgs() << "Marking " << BlockNamePrinter(To) << " as affected\n"); + II.Affected.insert(To); + const unsigned ToLevel = To->getLevel(); + DEBUG(dbgs() << "Putting " << BlockNamePrinter(To) << " into a Bucket\n"); + II.Bucket.push({ToLevel, To}); + + while (!II.Bucket.empty()) { + const TreeNodePtr CurrentNode = II.Bucket.top().second; + II.Bucket.pop(); + DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: " + << BlockNamePrinter(CurrentNode) << "\n"); + II.Visited.insert(CurrentNode); + II.AffectedQueue.push_back(CurrentNode); + + // Discover and collect affected successors of the current node. + VisitInsertion(DT, CurrentNode, CurrentNode->getLevel(), NCD, II); + } + + // Finish by updating immediate dominators and levels. + UpdateInsertion(DT, NCD, II); + } + + // Visits an affected node and collect its affected successors. + static void VisitInsertion(DomTreeT &DT, const TreeNodePtr TN, + const unsigned RootLevel, const TreeNodePtr NCD, + InsertionInfo &II) { + const unsigned NCDLevel = NCD->getLevel(); + DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n"); + + assert(TN->getBlock()); + for (const NodePtr Succ : + ChildrenGetter::Get(TN->getBlock())) { + const TreeNodePtr SuccTN = DT.getNode(Succ); + assert(SuccTN && "Unreachable successor found at reachable insertion"); + const unsigned SuccLevel = SuccTN->getLevel(); + + DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) + << ", level = " << SuccLevel << "\n"); + + // Succ dominated by subtree From -- not affected. + // (Based on the lemma 2.5 from the second paper.) + if (SuccLevel > RootLevel) { + DEBUG(dbgs() << "\t\tDominated by subtree From\n"); + if (II.Visited.count(SuccTN) != 0) continue; + + DEBUG(dbgs() << "\t\tMarking visited not affected " + << BlockNamePrinter(Succ) << "\n"); + II.Visited.insert(SuccTN); + II.VisitedNotAffectedQueue.push_back(SuccTN); + VisitInsertion(DT, SuccTN, RootLevel, NCD, II); + } else if ((SuccLevel > NCDLevel + 1) && II.Affected.count(SuccTN) == 0) { + DEBUG(dbgs() << "\t\tMarking affected and adding " + << BlockNamePrinter(Succ) << " to a Bucket\n"); + II.Affected.insert(SuccTN); + II.Bucket.push({SuccLevel, SuccTN}); + } + } + } + + // Updates immediate dominators and levels after insertion. + static void UpdateInsertion(DomTreeT &DT, const TreeNodePtr NCD, + InsertionInfo &II) { + DEBUG(dbgs() << "Updating NCD = " << BlockNamePrinter(NCD) << "\n"); + + for (const TreeNodePtr TN : II.AffectedQueue) { + DEBUG(dbgs() << "\tIDom(" << BlockNamePrinter(TN) + << ") = " << BlockNamePrinter(NCD) << "\n"); + TN->setIDom(NCD); + } + + UpdateLevelsAfterInsertion(II); + } + + static void UpdateLevelsAfterInsertion(InsertionInfo &II) { + DEBUG(dbgs() << "Updating levels for visited but not affected nodes\n"); + + for (const TreeNodePtr TN : II.VisitedNotAffectedQueue) { + DEBUG(dbgs() << "\tlevel(" << BlockNamePrinter(TN) << ") = (" + << BlockNamePrinter(TN->getIDom()) << ") " + << TN->getIDom()->getLevel() << " + 1\n"); + TN->UpdateLevel(); + } + } + + // Handles insertion to previously unreachable nodes. + static void InsertUnreachable(DomTreeT &DT, const TreeNodePtr From, + const NodePtr To) { + DEBUG(dbgs() << "Inserting " << BlockNamePrinter(From) + << " -> (unreachable) " << BlockNamePrinter(To) << "\n"); + + // Collect discovered edges to already reachable nodes. + SmallVector, 8> DiscoveredEdgesToReachable; + // Discover and connect nodes that became reachable with the insertion. + ComputeUnreachableDominators(DT, To, From, DiscoveredEdgesToReachable); + + DEBUG(dbgs() << "Inserted " << BlockNamePrinter(From) + << " -> (prev unreachable) " << BlockNamePrinter(To) << "\n"); + + DEBUG(DT.print(dbgs())); + + // Used the discovered edges and inset discovered connecting (incoming) + // edges. + for (const auto &Edge : DiscoveredEdgesToReachable) { + DEBUG(dbgs() << "\tInserting discovered connecting edge " + << BlockNamePrinter(Edge.first) << " -> " + << BlockNamePrinter(Edge.second) << "\n"); + InsertReachable(DT, DT.getNode(Edge.first), Edge.second); + } + } + + // Connects nodes that become reachable with an insertion. + static void ComputeUnreachableDominators( + DomTreeT &DT, const NodePtr Root, const TreeNodePtr Incoming, + SmallVectorImpl> + &DiscoveredConnectingEdges) { + assert(!DT.getNode(Root) && "Root must not be reachable"); + + // Visit only previously unreachable nodes. + auto UnreachableDescender = [&DT, &DiscoveredConnectingEdges](NodePtr From, + NodePtr To) { + const TreeNodePtr ToTN = DT.getNode(To); + if (!ToTN) return true; + + DiscoveredConnectingEdges.push_back({From, ToTN}); + return false; + }; + + SemiNCAInfo SNCA; + SNCA.runDFS(Root, 0, UnreachableDescender, 0); + SNCA.runSemiNCA(DT); + SNCA.attachNewSubtree(DT, Incoming); + + DEBUG(dbgs() << "After adding unreachable nodes\n"); + DEBUG(DT.print(dbgs())); } // Checks if the tree contains all reachable nodes in the input graph. @@ -298,12 +539,23 @@ struct SemiNCAInfo { for (auto &NodeToTN : DT.DomTreeNodes) { const TreeNodePtr TN = NodeToTN.second.get(); const NodePtr BB = TN->getBlock(); - if (!BB) continue; + + // Virtual root has a corresponding virtual CFG node. + if (DT.isVirtualRoot(TN)) continue; if (NodeToInfo.count(BB) == 0) { - errs() << "DomTree node "; - PrintBlockOrNullptr(errs(), BB); - errs() << " not found by DFS walk!\n"; + errs() << "DomTree node " << BlockNamePrinter(BB) + << " not found by DFS walk!\n"; + errs().flush(); + + return false; + } + } + + for (const NodePtr N : NumToNode) { + if (N && !DT.getNode(N)) { + errs() << "CFG node " << BlockNamePrinter(N) + << " not found in the DomTree!\n"; errs().flush(); return false; @@ -313,6 +565,215 @@ struct SemiNCAInfo { return true; } + static void DeleteEdge(DomTreeT &DT, const NodePtr From, const NodePtr To) { + assert(From && To && "Cannot disconnect nullptrs"); + DEBUG(dbgs() << "Deleting edge " << BlockNamePrinter(From) << " -> " + << BlockNamePrinter(To) << "\n"); + +#ifndef NDEBUG + // Ensure that the edge was in fact deleted from the CFG before informing + // the DomTree about it. + // The check is O(N), so run it only in debug configuration. + auto IsSuccessor = [](const NodePtr SuccCandidate, const NodePtr Of) { + auto Successors = ChildrenGetter::Get(Of); + return llvm::find(Successors, SuccCandidate) != Successors.end(); + }; + (void)IsSuccessor; + assert(!IsSuccessor(To, From) && "Deleted edge still exists in the CFG!"); +#endif + + const TreeNodePtr FromTN = DT.getNode(From); + // Deletion in an unreachable subtree -- nothing to do. + if (!FromTN) return; + + const TreeNodePtr ToTN = DT.getNode(To); + assert(ToTN && "To already unreachable -- there is no edge to delete"); + const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To); + const TreeNodePtr NCD = DT.getNode(NCDBlock); + + // To dominates From -- nothing to do. + if (ToTN == NCD) return; + + const TreeNodePtr ToIDom = ToTN->getIDom(); + DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom " + << BlockNamePrinter(ToIDom) << "\n"); + + // To remains reachable after deletion. + // (Based on the caption under Figure 4. from the second paper.) + if (FromTN != ToIDom || HasProperSupport(DT, ToTN)) + DeleteReachable(DT, FromTN, ToTN); + else + DeleteUnreachable(DT, ToTN); + } + + // Handles deletions that leave destination nodes reachable. + static void DeleteReachable(DomTreeT &DT, const TreeNodePtr FromTN, + const TreeNodePtr ToTN) { + DEBUG(dbgs() << "Deleting reachable " << BlockNamePrinter(FromTN) << " -> " + << BlockNamePrinter(ToTN) << "\n"); + DEBUG(dbgs() << "\tRebuilding subtree\n"); + + // Find the top of the subtree that needs to be rebuilt. + // (Based on the lemma 2.6 from the second paper.) + const NodePtr ToIDom = + DT.findNearestCommonDominator(FromTN->getBlock(), ToTN->getBlock()); + assert(ToIDom || DT.isPostDominator()); + const TreeNodePtr ToIDomTN = DT.getNode(ToIDom); + assert(ToIDomTN); + const TreeNodePtr PrevIDomSubTree = ToIDomTN->getIDom(); + // Top of the subtree to rebuild is the root node. Rebuild the tree from + // scratch. + if (!PrevIDomSubTree) { + DEBUG(dbgs() << "The entire tree needs to be rebuilt\n"); + DT.recalculate(*DT.Parent); + return; + } + + // Only visit nodes in the subtree starting at To. + const unsigned Level = ToIDomTN->getLevel(); + auto DescendBelow = [Level, &DT](NodePtr, NodePtr To) { + return DT.getNode(To)->getLevel() > Level; + }; + + DEBUG(dbgs() << "\tTop of subtree: " << BlockNamePrinter(ToIDomTN) << "\n"); + + SemiNCAInfo SNCA; + SNCA.runDFS(ToIDom, 0, DescendBelow, 0); + DEBUG(dbgs() << "\tRunning Semi-NCA\n"); + SNCA.runSemiNCA(DT, Level); + SNCA.reattachExistingSubtree(DT, PrevIDomSubTree); + } + + // Checks if a node has proper support, as defined on the page 3 and later + // explained on the page 7 of the second paper. + static bool HasProperSupport(DomTreeT &DT, const TreeNodePtr TN) { + DEBUG(dbgs() << "IsReachableFromIDom " << BlockNamePrinter(TN) << "\n"); + for (const NodePtr Pred : + ChildrenGetter::Get(TN->getBlock())) { + DEBUG(dbgs() << "\tPred " << BlockNamePrinter(Pred) << "\n"); + if (!DT.getNode(Pred)) continue; + + const NodePtr Support = + DT.findNearestCommonDominator(TN->getBlock(), Pred); + DEBUG(dbgs() << "\tSupport " << BlockNamePrinter(Support) << "\n"); + if (Support != TN->getBlock()) { + DEBUG(dbgs() << "\t" << BlockNamePrinter(TN) + << " is reachable from support " + << BlockNamePrinter(Support) << "\n"); + return true; + } + } + + return false; + } + + // Handle deletions that make destination node unreachable. + // (Based on the lemma 2.7 from the second paper.) + static void DeleteUnreachable(DomTreeT &DT, const TreeNodePtr ToTN) { + DEBUG(dbgs() << "Deleting unreachable subtree " << BlockNamePrinter(ToTN) + << "\n"); + assert(ToTN); + assert(ToTN->getBlock()); + + SmallVector AffectedQueue; + const unsigned Level = ToTN->getLevel(); + + // Traverse destination node's descendants with greater level in the tree + // and collect visited nodes. + auto DescendAndCollect = [Level, &AffectedQueue, &DT](NodePtr, NodePtr To) { + const TreeNodePtr TN = DT.getNode(To); + assert(TN); + if (TN->getLevel() > Level) return true; + if (llvm::find(AffectedQueue, To) == AffectedQueue.end()) + AffectedQueue.push_back(To); + + return false; + }; + + SemiNCAInfo SNCA; + unsigned LastDFSNum = + SNCA.runDFS(ToTN->getBlock(), 0, DescendAndCollect, 0); + + TreeNodePtr MinNode = ToTN; + + // Identify the top of the subtree to rebuilt by finding the NCD of all + // the affected nodes. + for (const NodePtr N : AffectedQueue) { + const TreeNodePtr TN = DT.getNode(N); + const NodePtr NCDBlock = + DT.findNearestCommonDominator(TN->getBlock(), ToTN->getBlock()); + assert(NCDBlock || DT.isPostDominator()); + const TreeNodePtr NCD = DT.getNode(NCDBlock); + assert(NCD); + + DEBUG(dbgs() << "Processing affected node " << BlockNamePrinter(TN) + << " with NCD = " << BlockNamePrinter(NCD) + << ", MinNode =" << BlockNamePrinter(MinNode) << "\n"); + if (NCD != TN && NCD->getLevel() < MinNode->getLevel()) MinNode = NCD; + } + + // Root reached, rebuild the whole tree from scratch. + if (!MinNode->getIDom()) { + DEBUG(dbgs() << "The entire tree needs to be rebuilt\n"); + DT.recalculate(*DT.Parent); + return; + } + + // Erase the unreachable subtree in reverse preorder to process all children + // before deleting their parent. + for (unsigned i = LastDFSNum; i > 0; --i) { + const NodePtr N = SNCA.NumToNode[i]; + const TreeNodePtr TN = DT.getNode(N); + DEBUG(dbgs() << "Erasing node " << BlockNamePrinter(TN) << "\n"); + + EraseNode(DT, TN); + } + + // The affected subtree start at the To node -- there's no extra work to do. + if (MinNode == ToTN) return; + + DEBUG(dbgs() << "DeleteUnreachable: running DFS with MinNode = " + << BlockNamePrinter(MinNode) << "\n"); + const unsigned MinLevel = MinNode->getLevel(); + const TreeNodePtr PrevIDom = MinNode->getIDom(); + assert(PrevIDom); + SNCA.clear(); + + // Identify nodes that remain in the affected subtree. + auto DescendBelow = [MinLevel, &DT](NodePtr, NodePtr To) { + const TreeNodePtr ToTN = DT.getNode(To); + return ToTN && ToTN->getLevel() > MinLevel; + }; + SNCA.runDFS(MinNode->getBlock(), 0, DescendBelow, 0); + + DEBUG(dbgs() << "Previous IDom(MinNode) = " << BlockNamePrinter(PrevIDom) + << "\nRunning Semi-NCA\n"); + + // Rebuild the remaining part of affected subtree. + SNCA.runSemiNCA(DT, MinLevel); + SNCA.reattachExistingSubtree(DT, PrevIDom); + } + + // Removes leaf tree nodes from the dominator tree. + static void EraseNode(DomTreeT &DT, const TreeNodePtr TN) { + assert(TN); + assert(TN->getNumChildren() == 0 && "Not a tree leaf"); + + const TreeNodePtr IDom = TN->getIDom(); + assert(IDom); + + auto ChIt = llvm::find(IDom->Children, TN); + assert(ChIt != IDom->Children.end()); + std::swap(*ChIt, IDom->Children.back()); + IDom->Children.pop_back(); + + DT.DomTreeNodes.erase(TN->getBlock()); + } + + //~~ + //===--------------- DomTree correctness verification ---------------------=== + //~~ + // Check if for every parent with a level L in the tree all of its children // have level L + 1. static bool VerifyLevels(const DomTreeT &DT) { @@ -323,20 +784,18 @@ struct SemiNCAInfo { const TreeNodePtr IDom = TN->getIDom(); if (!IDom && TN->getLevel() != 0) { - errs() << "Node without an IDom "; - PrintBlockOrNullptr(errs(), BB); - errs() << " has a nonzero level " << TN->getLevel() << "!\n"; + errs() << "Node without an IDom " << BlockNamePrinter(BB) + << " has a nonzero level " << TN->getLevel() << "!\n"; errs().flush(); return false; } if (IDom && TN->getLevel() != IDom->getLevel() + 1) { - errs() << "Node "; - PrintBlockOrNullptr(errs(), BB); - errs() << " has level " << TN->getLevel() << " while it's IDom "; - PrintBlockOrNullptr(errs(), IDom->getBlock()); - errs() << " has level " << IDom->getLevel() << "!\n"; + errs() << "Node " << BlockNamePrinter(BB) << " has level " + << TN->getLevel() << " while its IDom " + << BlockNamePrinter(IDom->getBlock()) << " has level " + << IDom->getLevel() << "!\n"; errs().flush(); return false; @@ -363,18 +822,14 @@ struct SemiNCAInfo { assert(ToTN); const NodePtr NCD = DT.findNearestCommonDominator(From, To); - const TreeNodePtr NCDTN = NCD ? DT.getNode(NCD) : nullptr; + const TreeNodePtr NCDTN = DT.getNode(NCD); const TreeNodePtr ToIDom = ToTN->getIDom(); if (NCDTN != ToTN && NCDTN != ToIDom) { - errs() << "NearestCommonDominator verification failed:\n\tNCD(From:"; - PrintBlockOrNullptr(errs(), From); - errs() << ", To:"; - PrintBlockOrNullptr(errs(), To); - errs() << ") = "; - PrintBlockOrNullptr(errs(), NCD); - errs() << ",\t (should be To or IDom[To]: "; - PrintBlockOrNullptr(errs(), ToIDom ? ToIDom->getBlock() : nullptr); - errs() << ")\n"; + errs() << "NearestCommonDominator verification failed:\n\tNCD(From:" + << BlockNamePrinter(From) << ", To:" << BlockNamePrinter(To) + << ") = " << BlockNamePrinter(NCD) + << ",\t (should be To or IDom[To]: " << BlockNamePrinter(ToIDom) + << ")\n"; errs().flush(); return false; @@ -440,11 +895,9 @@ struct SemiNCAInfo { for (TreeNodePtr Child : TN->getChildren()) if (NodeToInfo.count(Child->getBlock()) != 0) { - errs() << "Child "; - PrintBlockOrNullptr(errs(), Child->getBlock()); - errs() << " reachable after its parent "; - PrintBlockOrNullptr(errs(), BB); - errs() << " is removed!\n"; + errs() << "Child " << BlockNamePrinter(Child) + << " reachable after its parent " << BlockNamePrinter(BB) + << " is removed!\n"; errs().flush(); return false; @@ -477,11 +930,9 @@ struct SemiNCAInfo { if (S == N) continue; if (NodeToInfo.count(S->getBlock()) == 0) { - errs() << "Node "; - PrintBlockOrNullptr(errs(), S->getBlock()); - errs() << " not reachable when its sibling "; - PrintBlockOrNullptr(errs(), N->getBlock()); - errs() << " is removed!\n"; + errs() << "Node " << BlockNamePrinter(S) + << " not reachable when its sibling " << BlockNamePrinter(N) + << " is removed!\n"; errs().flush(); return false; @@ -494,23 +945,30 @@ struct SemiNCAInfo { } }; -template -void Calculate(DominatorTreeBaseByGraphTraits> &DT, - FuncT &F) { - using NodePtr = typename GraphTraits::NodeRef; - static_assert(std::is_pointer::value, - "NodePtr should be a pointer type"); - SemiNCAInfo::type> SNCA; - SNCA.template runSemiNCA(DT, GraphTraits::size(&F)); + +template +void Calculate(DomTreeT &DT, FuncT &F) { + SemiNCAInfo SNCA; + SNCA.calculateFromScratch(DT, GraphTraits::size(&F)); } -template -bool Verify(const DominatorTreeBaseByGraphTraits> &DT) { - using NodePtr = typename GraphTraits::NodeRef; - static_assert(std::is_pointer::value, - "NodePtr should be a pointer type"); - SemiNCAInfo::type> SNCA; +template +void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To) { + if (DT.isPostDominator()) std::swap(From, To); + SemiNCAInfo::InsertEdge(DT, From, To); +} +template +void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, + typename DomTreeT::NodePtr To) { + if (DT.isPostDominator()) std::swap(From, To); + SemiNCAInfo::DeleteEdge(DT, From, To); +} + +template +bool Verify(const DomTreeT &DT) { + SemiNCAInfo SNCA; return SNCA.verifyReachability(DT) && SNCA.VerifyLevels(DT) && SNCA.verifyNCD(DT) && SNCA.verifyParentProperty(DT) && SNCA.verifySiblingProperty(DT); @@ -519,4 +977,6 @@ bool Verify(const DominatorTreeBaseByGraphTraits> &DT) { } // namespace DomTreeBuilder } // namespace llvm +#undef DEBUG_TYPE + #endif diff --git a/contrib/llvm/include/llvm/Support/TargetParser.h b/contrib/llvm/include/llvm/Support/TargetParser.h index 72c28865ac57..e13582f6a6d3 100644 --- a/contrib/llvm/include/llvm/Support/TargetParser.h +++ b/contrib/llvm/include/llvm/Support/TargetParser.h @@ -85,6 +85,7 @@ enum ArchExtKind : unsigned { AEK_DSP = 0x400, AEK_FP16 = 0x800, AEK_RAS = 0x1000, + AEK_SVE = 0x2000, // Unsupported extensions. AEK_OS = 0x8000000, AEK_IWMMXT = 0x10000000, @@ -166,7 +167,8 @@ enum ArchExtKind : unsigned { AEK_FP16 = 0x20, AEK_PROFILE = 0x40, AEK_RAS = 0x80, - AEK_LSE = 0x100 + AEK_LSE = 0x100, + AEK_SVE = 0x200 }; StringRef getCanonicalArchName(StringRef Arch); diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h index 15b3b11db045..71fdf47f1979 100644 --- a/contrib/llvm/include/llvm/Support/YAMLTraits.h +++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h @@ -1114,6 +1114,10 @@ class Input : public IO { void *Ctxt = nullptr, SourceMgr::DiagHandlerTy DiagHandler = nullptr, void *DiagHandlerCtxt = nullptr); + Input(MemoryBufferRef Input, + void *Ctxt = nullptr, + SourceMgr::DiagHandlerTy DiagHandler = nullptr, + void *DiagHandlerCtxt = nullptr); ~Input() override; // Check if there was an syntax or semantic error during parsing. diff --git a/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 178b08d7b8b7..50de41fd1320 100644 --- a/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -58,6 +58,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h index 60a03bdc182d..23711d636c9a 100644 --- a/contrib/llvm/include/llvm/Target/TargetLowering.h +++ b/contrib/llvm/include/llvm/Target/TargetLowering.h @@ -2012,6 +2012,35 @@ class TargetLoweringBase { return isExtFreeImpl(I); } + /// Return true if \p Load and \p Ext can form an ExtLoad. + /// For example, in AArch64 + /// %L = load i8, i8* %ptr + /// %E = zext i8 %L to i32 + /// can be lowered into one load instruction + /// ldrb w0, [x0] + bool isExtLoad(const LoadInst *Load, const Instruction *Ext, + const DataLayout &DL) const { + EVT VT = getValueType(DL, Ext->getType()); + EVT LoadVT = getValueType(DL, Load->getType()); + + // If the load has other users and the truncate is not free, the ext + // probably isn't free. + if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && + !isTruncateFree(Ext->getType(), Load->getType())) + return false; + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa(Ext)) + LType = ISD::ZEXTLOAD; + else { + assert(isa(Ext) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + + return isLoadExtLegal(LType, VT, LoadVT); + } + /// Return true if any actual instruction that defines a value of type FromTy /// implicitly zero-extends the value to ToTy in the result register. /// diff --git a/contrib/llvm/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h b/contrib/llvm/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h new file mode 100644 index 000000000000..964b0f7620a2 --- /dev/null +++ b/contrib/llvm/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h @@ -0,0 +1,24 @@ +//===- DlltoolDriver.h - dlltool.exe-compatible driver ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines an interface to a dlltool.exe-compatible driver. +// Used by llvm-dlltool. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLDRIVERS_LLVM_DLLTOOL_DLLTOOLDRIVER_H +#define LLVM_TOOLDRIVERS_LLVM_DLLTOOL_DLLTOOLDRIVER_H + +namespace llvm { +template class ArrayRef; + +int dlltoolDriverMain(ArrayRef ArgsArr); +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp index 3ddefc6520a7..74b5d79ebac5 100644 --- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -433,7 +433,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( if (Visited.insert(C).second) Worklist.push_back(C); - LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) { + auto VisitRef = [&](Function &Referee) { Node &RefereeN = *G.lookup(Referee); Edge *E = N->lookup(RefereeN); // FIXME: Similarly to new calls, we also currently preclude @@ -444,7 +444,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( RetainedEdges.insert(&RefereeN); if (E->isCall()) DemotedCallTargets.insert(&RefereeN); - }); + }; + LazyCallGraph::visitReferences(Worklist, Visited, VisitRef); + + // Include synthetic reference edges to known, defined lib functions. + for (auto *F : G.getLibFunctions()) + VisitRef(*F); // First remove all of the edges that are no longer present in this function. // We have to build a list of dead targets first and then remove them as the diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp index 5b6e2d0476e4..c08c6cfe0c3b 100644 --- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -14,7 +14,8 @@ using namespace llvm; namespace llvm { -template class DominanceFrontierBase; +template class DominanceFrontierBase; +template class DominanceFrontierBase; template class ForwardDominanceFrontierBase; } diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp index 27c6b580e7ac..95ab6ee3db5b 100644 --- a/contrib/llvm/lib/Analysis/InstCount.cpp +++ b/contrib/llvm/lib/Analysis/InstCount.cpp @@ -26,7 +26,6 @@ using namespace llvm; STATISTIC(TotalInsts , "Number of instructions (of all types)"); STATISTIC(TotalBlocks, "Number of basic blocks"); STATISTIC(TotalFuncs , "Number of non-external functions"); -STATISTIC(TotalMemInst, "Number of memory instructions"); #define HANDLE_INST(N, OPCODE, CLASS) \ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); @@ -75,13 +74,6 @@ FunctionPass *llvm::createInstCountPass() { return new InstCount(); } // function. // bool InstCount::runOnFunction(Function &F) { - unsigned StartMemInsts = - NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + - NumInvokeInst + NumAllocaInst; visit(F); - unsigned EndMemInsts = - NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + - NumInvokeInst + NumAllocaInst; - TotalMemInst += EndMemInsts-StartMemInsts; return false; } diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index f6632020b8fc..b4f3b87e1846 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1745,14 +1745,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Constant::getNullValue(Op0->getType()); // (A | ?) & A = A - Value *A = nullptr, *B = nullptr; - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - (A == Op1 || B == Op1)) + if (match(Op0, m_c_Or(m_Specific(Op1), m_Value()))) return Op1; // A & (A | ?) = A - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - (A == Op0 || B == Op0)) + if (match(Op1, m_c_Or(m_Specific(Op0), m_Value()))) return Op0; // A mask that only clears known zeros of a shifted value is a no-op. @@ -1852,26 +1849,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Constant::getAllOnesValue(Op0->getType()); // (A & ?) | A = A - Value *A = nullptr, *B = nullptr; - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - (A == Op1 || B == Op1)) + if (match(Op0, m_c_And(m_Specific(Op1), m_Value()))) return Op1; // A | (A & ?) = A - if (match(Op1, m_And(m_Value(A), m_Value(B))) && - (A == Op0 || B == Op0)) + if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) return Op0; // ~(A & ?) | A = -1 - if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) && - (A == Op1 || B == Op1)) + if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value())))) return Constant::getAllOnesValue(Op1->getType()); // A | ~(A & ?) = -1 - if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) && - (A == Op0 || B == Op0)) + if (match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value())))) return Constant::getAllOnesValue(Op0->getType()); + Value *A, *B; // (A & ~B) | (A ^ B) -> (A ^ B) // (~B & A) | (A ^ B) -> (A ^ B) // (A & ~B) | (B ^ A) -> (B ^ A) diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp index 0e02850df349..3992657417c5 100644 --- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp @@ -17,8 +17,8 @@ #include namespace llvm { -template -void IDFCalculator::calculate( +template +void IDFCalculator::calculate( SmallVectorImpl &PHIBlocks) { // Use a priority queue keyed on dominator tree level so that inserted nodes // are handled from the bottom of the dominator tree upwards. @@ -88,6 +88,6 @@ void IDFCalculator::calculate( } } -template class IDFCalculator; -template class IDFCalculator>; +template class IDFCalculator; +template class IDFCalculator, true>; } diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp index a4c3e43b4b0c..d287f81985fd 100644 --- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp @@ -106,6 +106,13 @@ LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() { LazyCallGraph::Edge::Ref); }); + // Add implicit reference edges to any defined libcall functions (if we + // haven't found an explicit edge). + for (auto *F : G->LibFunctions) + if (!Visited.count(F)) + addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*F), + LazyCallGraph::Edge::Ref); + return *Edges; } @@ -120,15 +127,34 @@ LLVM_DUMP_METHOD void LazyCallGraph::Node::dump() const { } #endif -LazyCallGraph::LazyCallGraph(Module &M) { +static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { + LibFunc LF; + + // Either this is a normal library function or a "vectorizable" function. + return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); +} + +LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() << "\n"); - for (Function &F : M) - if (!F.isDeclaration() && !F.hasLocalLinkage()) { - DEBUG(dbgs() << " Adding '" << F.getName() - << "' to entry set of the graph.\n"); - addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref); - } + for (Function &F : M) { + if (F.isDeclaration()) + continue; + // If this function is a known lib function to LLVM then we want to + // synthesize reference edges to it to model the fact that LLVM can turn + // arbitrary code into a library function call. + if (isKnownLibFunction(F, TLI)) + LibFunctions.insert(&F); + + if (F.hasLocalLinkage()) + continue; + + // External linkage defined functions have edges to them from other + // modules. + DEBUG(dbgs() << " Adding '" << F.getName() + << "' to entry set of the graph.\n"); + addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref); + } // Now add entry nodes for functions reachable via initializers to globals. SmallVector Worklist; @@ -149,7 +175,8 @@ LazyCallGraph::LazyCallGraph(Module &M) { LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)), EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)), - SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)) { + SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)), + LibFunctions(std::move(G.LibFunctions)) { updateGraphPtrs(); } @@ -160,6 +187,7 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { SCCBPA = std::move(G.SCCBPA); SCCMap = std::move(G.SCCMap); LeafRefSCCs = std::move(G.LeafRefSCCs); + LibFunctions = std::move(G.LibFunctions); updateGraphPtrs(); return *this; } @@ -1580,6 +1608,11 @@ void LazyCallGraph::removeDeadFunction(Function &F) { assert(F.use_empty() && "This routine should only be called on trivially dead functions!"); + // We shouldn't remove library functions as they are never really dead while + // the call graph is in use -- every function definition refers to them. + assert(!isLibFunction(F) && + "Must not remove lib functions from the call graph!"); + auto NI = NodeMap.find(&F); if (NI == NodeMap.end()) // Not in the graph at all! diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index baf932432a0a..697b58622bb4 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -609,7 +609,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { return NearLoop; } -LoopInfo::LoopInfo(const DominatorTreeBase &DomTree) { +LoopInfo::LoopInfo(const DomTreeBase &DomTree) { analyze(DomTree); } diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp index 86d0d92799f2..86de474c7aa9 100644 --- a/contrib/llvm/lib/Analysis/MemorySSA.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp @@ -39,7 +39,6 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Transforms/Scalar.h" #include #define DEBUG_TYPE "memoryssa" diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp index 1caf151546d9..811373ac850b 100644 --- a/contrib/llvm/lib/Analysis/PostDominators.cpp +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -23,6 +23,8 @@ using namespace llvm; #define DEBUG_TYPE "postdomtree" +template class llvm::DominatorTreeBase; // PostDomTreeBase + //===----------------------------------------------------------------------===// // PostDominatorTree Implementation //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 3fb1ab980add..b973203a89b6 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4173,6 +4173,319 @@ static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { return None; } +/// Helper function to createAddRecFromPHIWithCasts. We have a phi +/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via +/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the +/// way. This function checks if \p Op, an operand of this SCEVAddExpr, +/// follows one of the following patterns: +/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) +/// If the SCEV expression of \p Op conforms with one of the expected patterns +/// we return the type of the truncation operation, and indicate whether the +/// truncated type should be treated as signed/unsigned by setting +/// \p Signed to true/false, respectively. +static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, + bool &Signed, ScalarEvolution &SE) { + + // The case where Op == SymbolicPHI (that is, with no type conversions on + // the way) is handled by the regular add recurrence creating logic and + // would have already been triggered in createAddRecForPHI. Reaching it here + // means that createAddRecFromPHI had failed for this PHI before (e.g., + // because one of the other operands of the SCEVAddExpr updating this PHI is + // not invariant). + // + // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in + // this case predicates that allow us to prove that Op == SymbolicPHI will + // be added. + if (Op == SymbolicPHI) + return nullptr; + + unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType()); + unsigned NewBits = SE.getTypeSizeInBits(Op->getType()); + if (SourceBits != NewBits) + return nullptr; + + const SCEVSignExtendExpr *SExt = dyn_cast(Op); + const SCEVZeroExtendExpr *ZExt = dyn_cast(Op); + if (!SExt && !ZExt) + return nullptr; + const SCEVTruncateExpr *Trunc = + SExt ? dyn_cast(SExt->getOperand()) + : dyn_cast(ZExt->getOperand()); + if (!Trunc) + return nullptr; + const SCEV *X = Trunc->getOperand(); + if (X != SymbolicPHI) + return nullptr; + Signed = SExt ? true : false; + return Trunc->getType(); +} + +static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { + if (!PN->getType()->isIntegerTy()) + return nullptr; + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent()) + return nullptr; + return L; +} + +// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the +// computation that updates the phi follows the following pattern: +// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +// which correspond to a phi->trunc->sext/zext->add->phi update chain. +// If so, try to see if it can be rewritten as an AddRecExpr under some +// Predicates. If successful, return them as a pair. Also cache the results +// of the analysis. +// +// Example usage scenario: +// Say the Rewriter is called for the following SCEV: +// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) +// where: +// %X = phi i64 (%Start, %BEValue) +// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), +// and call this function with %SymbolicPHI = %X. +// +// The analysis will find that the value coming around the backedge has +// the following SCEV: +// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) +// Upon concluding that this matches the desired pattern, the function +// will return the pair {NewAddRec, SmallPredsVec} where: +// NewAddRec = {%Start,+,%Step} +// SmallPredsVec = {P1, P2, P3} as follows: +// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)} Flags: +// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) +// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) +// The returned pair means that SymbolicPHI can be rewritten into NewAddRec +// under the predicates {P1,P2,P3}. +// This predicated rewrite will be cached in PredicatedSCEVRewrites: +// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} +// +// TODO's: +// +// 1) Extend the Induction descriptor to also support inductions that involve +// casts: When needed (namely, when we are called in the context of the +// vectorizer induction analysis), a Set of cast instructions will be +// populated by this method, and provided back to isInductionPHI. This is +// needed to allow the vectorizer to properly record them to be ignored by +// the cost model and to avoid vectorizing them (otherwise these casts, +// which are redundant under the runtime overflow checks, will be +// vectorized, which can be costly). +// +// 2) Support additional induction/PHISCEV patterns: We also want to support +// inductions where the sext-trunc / zext-trunc operations (partly) occur +// after the induction update operation (the induction increment): +// +// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) +// which correspond to a phi->add->trunc->sext/zext->phi update chain. +// +// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) +// which correspond to a phi->trunc->add->sext/zext->phi update chain. +// +// 3) Outline common code with createAddRecFromPHI to avoid duplication. +// +Optional>> +ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) { + SmallVector Predicates; + + // *** Part1: Analyze if we have a phi-with-cast pattern for which we can + // return an AddRec expression under some predicate. + + auto *PN = cast(SymbolicPHI->getValue()); + const Loop *L = isIntegerLoopHeaderPHI(PN, LI); + assert (L && "Expecting an integer loop header phi"); + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = nullptr, *StartValueV = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = nullptr; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = nullptr; + break; + } + } + if (!BEValueV || !StartValueV) + return None; + + const SCEV *BEValue = getSCEV(BEValueV); + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, possibly with casts that we can ignore under + // an appropriate runtime guard, then we found a simple induction variable! + const auto *Add = dyn_cast(BEValue); + if (!Add) + return None; + + // If there is a single occurrence of the symbolic value, possibly + // casted, replace it with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + Type *TruncTy = nullptr; + bool Signed; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if ((TruncTy = + isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex == Add->getNumOperands()) + return None; + + // Create an add with everything but the specified operand. + SmallVector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // The runtime checks will not be valid if the step amount is + // varying inside the loop. + if (!isLoopInvariant(Accum, L)) + return None; + + + // *** Part2: Create the predicates + + // Analysis was successful: we have a phi-with-cast pattern for which we + // can return an AddRec expression under the following predicates: + // + // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) + // fits within the truncated type (does not overflow) for i = 0 to n-1. + // P2: An Equal predicate that guarantees that + // Start = (Ext ix (Trunc iy (Start) to ix) to iy) + // P3: An Equal predicate that guarantees that + // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) + // + // As we next prove, the above predicates guarantee that: + // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) + // + // + // More formally, we want to prove that: + // Expr(i+1) = Start + (i+1) * Accum + // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum + // + // Given that: + // 1) Expr(0) = Start + // 2) Expr(1) = Start + Accum + // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 + // 3) Induction hypothesis (step i): + // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + // + // Proof: + // Expr(i+1) = + // = Start + (i+1)*Accum + // = (Start + i*Accum) + Accum + // = Expr(i) + Accum + // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum + // :: from step i + // + // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum + // + // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + // + (Ext ix (Trunc iy (Accum) to ix) to iy) + // + Accum :: from P3 + // + // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) + // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) + // + // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum + // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum + // + // By induction, the same applies to all iterations 1<=i(PHISCEV); + + SCEVWrapPredicate::IncrementWrapFlags AddedFlags = + Signed ? SCEVWrapPredicate::IncrementNSSW + : SCEVWrapPredicate::IncrementNUSW; + const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); + Predicates.push_back(AddRecPred); + + // Create the Equal Predicates P2,P3: + auto AppendPredicate = [&](const SCEV *Expr) -> void { + assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); + const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); + const SCEV *ExtendedExpr = + Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType()) + : getZeroExtendExpr(TruncatedExpr, Expr->getType()); + if (Expr != ExtendedExpr && + !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { + const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); + DEBUG (dbgs() << "Added Predicate: " << *Pred); + Predicates.push_back(Pred); + } + }; + + AppendPredicate(StartVal); + AppendPredicate(Accum); + + // *** Part3: Predicates are ready. Now go ahead and create the new addrec in + // which the casts had been folded away. The caller can rewrite SymbolicPHI + // into NewAR if it will also add the runtime overflow checks specified in + // Predicates. + auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); + + std::pair> PredRewrite = + std::make_pair(NewAR, Predicates); + // Remember the result of the analysis for this SCEV at this locayyytion. + PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite; + return PredRewrite; +} + +Optional>> +ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { + + auto *PN = cast(SymbolicPHI->getValue()); + const Loop *L = isIntegerLoopHeaderPHI(PN, LI); + if (!L) + return None; + + // Check to see if we already analyzed this PHI. + auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); + if (I != PredicatedSCEVRewrites.end()) { + std::pair> Rewrite = + I->second; + // Analysis was done before and failed to create an AddRec: + if (Rewrite.first == SymbolicPHI) + return None; + // Analysis was done before and succeeded to create an AddRec under + // a predicate: + assert(isa(Rewrite.first) && "Expected an AddRec"); + assert(!(Rewrite.second).empty() && "Expected to find Predicates"); + return Rewrite; + } + + Optional>> + Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI); + + // Record in the cache that the analysis failed + if (!Rewrite) { + SmallVector Predicates; + PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; + return None; + } + + return Rewrite; +} + /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most @@ -5904,6 +6217,16 @@ void ScalarEvolution::forgetLoop(const Loop *L) { RemoveLoopFromBackedgeMap(BackedgeTakenCounts); RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts); + // Drop information about predicated SCEV rewrites for this loop. + for (auto I = PredicatedSCEVRewrites.begin(); + I != PredicatedSCEVRewrites.end();) { + std::pair Entry = I->first; + if (Entry.second == L) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + // Drop information about expressions based on loop-header PHIs. SmallVector Worklist; PushLoopPHIs(L, Worklist); @@ -10062,6 +10385,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), + PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; } @@ -10462,6 +10786,15 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { HasRecMap.erase(S); MinTrailingZerosCache.erase(S); + for (auto I = PredicatedSCEVRewrites.begin(); + I != PredicatedSCEVRewrites.end();) { + std::pair Entry = I->first; + if (Entry.first == S) + PredicatedSCEVRewrites.erase(I++); + else + ++I; + } + auto RemoveSCEVFromBackedgeMap = [S, this](DenseMap &Map) { for (auto I = Map.begin(), E = Map.end(); I != E;) { @@ -10621,10 +10954,11 @@ void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredTransitive(); } -const SCEVPredicate * -ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS, - const SCEVConstant *RHS) { +const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, + const SCEV *RHS) { FoldingSetNodeID ID; + assert(LHS->getType() == RHS->getType() && + "Type mismatch between LHS and RHS"); // Unique this node based on the arguments ID.AddInteger(SCEVPredicate::P_Equal); ID.AddPointer(LHS); @@ -10687,8 +11021,7 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor { if (IPred->getLHS() == Expr) return IPred->getRHS(); } - - return Expr; + return convertToAddRecWithPreds(Expr); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { @@ -10724,17 +11057,41 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor { } private: - bool addOverflowAssumption(const SCEVAddRecExpr *AR, - SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { - auto *A = SE.getWrapPredicate(AR, AddedFlags); + bool addOverflowAssumption(const SCEVPredicate *P) { if (!NewPreds) { // Check if we've already made this assumption. - return Pred && Pred->implies(A); + return Pred && Pred->implies(P); } - NewPreds->insert(A); + NewPreds->insert(P); return true; } + bool addOverflowAssumption(const SCEVAddRecExpr *AR, + SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { + auto *A = SE.getWrapPredicate(AR, AddedFlags); + return addOverflowAssumption(A); + } + + // If \p Expr represents a PHINode, we try to see if it can be represented + // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible + // to add this predicate as a runtime overflow check, we return the AddRec. + // If \p Expr does not meet these conditions (is not a PHI node, or we + // couldn't create an AddRec for it, or couldn't add the predicate), we just + // return \p Expr. + const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { + if (!isa(Expr->getValue())) + return Expr; + Optional>> + PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); + if (!PredicatedRewrite) + return Expr; + for (auto *P : PredicatedRewrite->second){ + if (!addOverflowAssumption(P)) + return Expr; + } + return PredicatedRewrite->first; + } + SmallPtrSetImpl *NewPreds; SCEVUnionPredicate *Pred; const Loop *L; @@ -10771,9 +11128,11 @@ SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, : FastID(ID), Kind(Kind) {} SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, - const SCEVUnknown *LHS, - const SCEVConstant *RHS) - : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {} + const SCEV *LHS, const SCEV *RHS) + : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) { + assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); + assert(LHS != RHS && "LHS and RHS are the same SCEV"); +} bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { const auto *Op = dyn_cast(N); diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 94bbc58541a7..25813c65037f 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -82,6 +82,11 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); } +int TargetTransformInfo::getExtCost(const Instruction *I, + const Value *Src) const { + return TTIImpl->getExtCost(I, Src); +} + int TargetTransformInfo::getIntrinsicCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const { int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp index 428bb21fbf51..90e0d6a216ee 100644 --- a/contrib/llvm/lib/AsmParser/LLLexer.cpp +++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp @@ -588,7 +588,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(spir_func); KEYWORD(intel_ocl_bicc); KEYWORD(x86_64_sysvcc); - KEYWORD(x86_64_win64cc); + KEYWORD(win64cc); KEYWORD(x86_regcallcc); KEYWORD(webkit_jscc); KEYWORD(swiftcc); diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index 717eb0e00f4f..13679ce1d25c 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -1670,7 +1670,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'spir_func' /// ::= 'spir_kernel' /// ::= 'x86_64_sysvcc' -/// ::= 'x86_64_win64cc' +/// ::= 'win64cc' /// ::= 'webkit_jscc' /// ::= 'anyregcc' /// ::= 'preserve_mostcc' @@ -1712,7 +1712,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break; case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break; case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break; - case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break; + case lltok::kw_win64cc: CC = CallingConv::Win64; break; case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break; case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break; case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break; @@ -4411,13 +4411,15 @@ bool LLParser::ParseDIImportedEntity(MDNode *&Result, bool IsDistinct) { REQUIRED(tag, DwarfTagField, ); \ REQUIRED(scope, MDField, ); \ OPTIONAL(entity, MDField, ); \ + OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ OPTIONAL(name, MDStringField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS - Result = GET_OR_DISTINCT(DIImportedEntity, (Context, tag.Val, scope.Val, - entity.Val, line.Val, name.Val)); + Result = GET_OR_DISTINCT( + DIImportedEntity, + (Context, tag.Val, scope.Val, entity.Val, file.Val, line.Val, name.Val)); return false; } diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h index 9c7a06de81b4..0f3707ba0d1e 100644 --- a/contrib/llvm/lib/AsmParser/LLToken.h +++ b/contrib/llvm/lib/AsmParser/LLToken.h @@ -141,7 +141,7 @@ enum Kind { kw_spir_kernel, kw_spir_func, kw_x86_64_sysvcc, - kw_x86_64_win64cc, + kw_win64cc, kw_webkit_jscc, kw_anyregcc, kw_swiftcc, diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index b1504a8034e0..10fbcdea784f 100644 --- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1671,15 +1671,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_IMPORTED_ENTITY: { - if (Record.size() != 6) + if (Record.size() != 6 && Record.size() != 7) return error("Invalid record"); IsDistinct = Record[0]; + bool HasFile = (Record.size() == 7); MetadataList.assignValue( GET_OR_DISTINCT(DIImportedEntity, (Context, Record[1], getMDOrNull(Record[2]), - getDITypeRefOrNull(Record[3]), Record[4], - getMDString(Record[5]))), + getDITypeRefOrNull(Record[3]), + HasFile ? getMDOrNull(Record[6]) : nullptr, + HasFile ? Record[4] : 0, getMDString(Record[5]))), NextMetadataNo); NextMetadataNo++; break; diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 0e518d2bbc8f..dcffde1742cd 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1718,6 +1718,7 @@ void ModuleBitcodeWriter::writeDIImportedEntity( Record.push_back(VE.getMetadataOrNullID(N->getEntity())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getRawFile())); Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev); Record.clear(); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d4a90eeabe15..676c48fe5c67 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -664,8 +664,9 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( else EntityDie = getDIE(Entity); assert(EntityDie); - addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(), - Module->getScope()->getDirectory()); + auto *File = Module->getFile(); + addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "", + File ? File->getDirectory() : ""); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); StringRef Name = Module->getName(); if (!Name.empty()) diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index b7155ac2480a..45dc13d58de7 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4267,9 +4267,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. - Value *Consensus = nullptr; - unsigned NumUsesConsensus = 0; - bool IsNumUsesConsensusValid = false; + bool AddrModeFound = false; bool PhiSeen = false; SmallVector AddrModeInsts; ExtAddrMode AddrMode; @@ -4280,11 +4278,17 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *V = worklist.back(); worklist.pop_back(); - // Break use-def graph loops. - if (!Visited.insert(V).second) { - Consensus = nullptr; - break; - } + // We allow traversing cyclic Phi nodes. + // In case of success after this loop we ensure that traversing through + // Phi nodes ends up with all cases to compute address of the form + // BaseGV + Base + Scale * Index + Offset + // where Scale and Offset are constans and BaseGV, Base and Index + // are exactly the same Values in all cases. + // It means that BaseGV, Scale and Offset dominate our memory instruction + // and have the same value as they had in address computation represented + // as Phi. So we can safely sink address computation to memory instruction. + if (!Visited.insert(V).second) + continue; // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast(V)) { @@ -4297,47 +4301,26 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For non-PHIs, determine the addressing mode being computed. Note that // the result may differ depending on what other uses our candidate // addressing instructions might have. - SmallVector NewAddrModeInsts; + AddrModeInsts.clear(); ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT); + V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, + InsertedInsts, PromotedInsts, TPT); - // This check is broken into two cases with very similar code to avoid using - // getNumUses() as much as possible. Some values have a lot of uses, so - // calling getNumUses() unconditionally caused a significant compile-time - // regression. - if (!Consensus) { - Consensus = V; + if (!AddrModeFound) { + AddrModeFound = true; AddrMode = NewAddrMode; - AddrModeInsts = NewAddrModeInsts; - continue; - } else if (NewAddrMode == AddrMode) { - if (!IsNumUsesConsensusValid) { - NumUsesConsensus = Consensus->getNumUses(); - IsNumUsesConsensusValid = true; - } - - // Ensure that the obtained addressing mode is equivalent to that obtained - // for all other roots of the PHI traversal. Also, when choosing one - // such root as representative, select the one with the most uses in order - // to keep the cost modeling heuristics in AddressingModeMatcher - // applicable. - unsigned NumUses = V->getNumUses(); - if (NumUses > NumUsesConsensus) { - Consensus = V; - NumUsesConsensus = NumUses; - AddrModeInsts = NewAddrModeInsts; - } continue; } + if (NewAddrMode == AddrMode) + continue; - Consensus = nullptr; + AddrModeFound = false; break; } // If the addressing mode couldn't be determined, or if multiple different // ones were determined, bail out now. - if (!Consensus) { + if (!AddrModeFound) { TPT.rollback(LastKnownGood); return false; } @@ -4847,25 +4830,7 @@ bool CodeGenPrepare::canFormExtLd( if (!HasPromoted && LI->getParent() == Inst->getParent()) return false; - EVT VT = TLI->getValueType(*DL, Inst->getType()); - EVT LoadVT = TLI->getValueType(*DL, LI->getType()); - - // If the load has other users and the truncate is not free, this probably - // isn't worthwhile. - if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && - !TLI->isTruncateFree(Inst->getType(), LI->getType())) - return false; - - // Check whether the target supports casts folded into loads. - unsigned LType; - if (isa(Inst)) - LType = ISD::ZEXTLOAD; - else { - assert(isa(Inst) && "Unexpected ext type!"); - LType = ISD::SEXTLOAD; - } - - return TLI->isLoadExtLegal(LType, VT, LoadVT); + return TLI->isExtLoad(LI, Inst, *DL); } /// Move a zext or sext fed by a load into the same basic block as the load, diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 49fb5e8f075b..5258370e6680 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -433,9 +433,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { } case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: { unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV || + MI.getOpcode() == TargetOpcode::G_SREM || MI.getOpcode() == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index c176de16b593..e6f80dbb8630 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -11,8 +11,6 @@ // instructions do not lengthen the critical path or the resource depth. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-combiner" - #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" @@ -32,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "machine-combiner" + STATISTIC(NumInstCombined, "Number of machineinst combined"); namespace { diff --git a/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp index 28ecc8f96805..b559e4e513a6 100644 --- a/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -15,7 +15,8 @@ using namespace llvm; namespace llvm { -template class DominanceFrontierBase; +template class DominanceFrontierBase; +template class DominanceFrontierBase; template class ForwardDominanceFrontierBase; } diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp index 65e9e5d195a4..845e8232477c 100644 --- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -31,7 +31,7 @@ static cl::opt VerifyMachineDomInfoX( namespace llvm { template class DomTreeNodeBase; -template class DominatorTreeBase; +template class DominatorTreeBase; // DomTreeBase } char MachineDominatorTree::ID = 0; @@ -49,7 +49,7 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { CriticalEdgesToSplit.clear(); NewBBs.clear(); - DT.reset(new DominatorTreeBase(false)); + DT.reset(new DomTreeBase()); DT->recalculate(F); return false; } @@ -144,7 +144,7 @@ void MachineDominatorTree::verifyDomTree() const { return; MachineFunction &F = *getRoot()->getParent(); - DominatorTreeBase OtherDT(false); + DomTreeBase OtherDT; OtherDT.recalculate(F); if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || DT->compare(OtherDT)) { diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp index c3f6e9249e7d..488377998cb3 100644 --- a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp +++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp @@ -16,6 +16,10 @@ using namespace llvm; +namespace llvm { +template class DominatorTreeBase; // PostDomTreeBase +} + char MachinePostDominatorTree::ID = 0; //declare initializeMachinePostDominatorTreePass @@ -24,8 +28,7 @@ INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); - DT = new DominatorTreeBase(true); //true indicate - // postdominator + DT = new PostDomTreeBase(); } FunctionPass * diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 71382c18fdf9..d5d3f7a61a9f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3889,9 +3889,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Note: the SimplifyDemandedBits fold below can make an information-losing // transform, and then we have no way to find this better fold. if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { - ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0)); - SDValue SubRHS = N0.getOperand(1); - if (SubLHS && SubLHS->isNullValue()) { + if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) { + SDValue SubRHS = N0.getOperand(1); if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) return SubRHS; @@ -4586,6 +4585,20 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, return nullptr; } +// if Left + Right == Sum (constant or constant splat vector) +static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum, + SelectionDAG &DAG, const SDLoc &DL) { + EVT ShiftVT = Left.getValueType(); + if (ShiftVT != Right.getValueType()) return false; + + SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT, + Left.getNode(), Right.getNode()); + if (!ShiftSum) return false; + + ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum); + return CSum && CSum->getZExtValue() == Sum; +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. @@ -4631,30 +4644,24 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { - uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); - uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); - if ((LShVal + RShVal) != EltSizeInBits) - return nullptr; - + if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) { SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { - SDValue Mask = DAG.getAllOnesConstant(DL, VT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); + SDValue Mask = AllOnes; if (LHSMask.getNode()) { - APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); + SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, - DAG.getNode(ISD::OR, DL, VT, LHSMask, - DAG.getConstant(RHSBits, DL, VT))); + DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); } if (RHSMask.getNode()) { - APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); + SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, - DAG.getNode(ISD::OR, DL, VT, RHSMask, - DAG.getConstant(LHSBits, DL, VT))); + DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); } Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); @@ -5272,11 +5279,21 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + unsigned Bitsize = VT.getScalarSizeInBits(); // fold (rot x, 0) -> x if (isNullConstantOrNullSplatConstant(N1)) return N0; + // fold (rot x, c) -> (rot x, c % BitSize) + if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { + if (Cst->getAPIntValue().uge(Bitsize)) { + uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); + return DAG.getNode(N->getOpcode(), dl, VT, N0, + DAG.getConstant(RotAmt, dl, N1.getValueType())); + } + } + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -5286,22 +5303,24 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { unsigned NextOp = N0.getOpcode(); // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) - if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) - if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) - if (SDNode *C2 = - DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - bool SameSide = (N->getOpcode() == NextOp); - unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; - if (SDValue CombinedShift = - DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) { - unsigned Bitsize = VT.getScalarSizeInBits(); - SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT); - SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( - ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode()); - return DAG.getNode( - N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); - } + if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { + SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); + SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); + if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { + EVT ShiftVT = C1->getValueType(0); + bool SameSide = (N->getOpcode() == NextOp); + unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; + if (SDValue CombinedShift = + DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), + BitsizeC.getNode()); + return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), + CombinedShiftNorm); } + } + } return SDValue(); } @@ -7152,8 +7171,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked! + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + CombineTo(N, ExtLoad); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N, 0); } } @@ -7210,8 +7235,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - return CombineTo(N, And); // Return N so it doesn't get rechecked! + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + CombineTo(N, And); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N,0); // Return N so it doesn't get rechecked! } } } @@ -7451,8 +7481,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked! + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + CombineTo(N, ExtLoad); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -7503,8 +7539,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); - CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - return CombineTo(N, And); // Return N so it doesn't get rechecked! + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + CombineTo(N, And); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N,0); // Return N so it doesn't get rechecked! } } } @@ -7676,13 +7717,18 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); - CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ANY_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = N0.hasOneUse(); + CombineTo(N, ExtLoad); + if (NoReplaceTrunc) + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + else + CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -11373,12 +11419,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); - // Make sure the new and old chains are cleaned up. - AddToWorklist(Token.getNode()); - - // Replace uses with load result and token factor. Don't add users - // to work list. - return CombineTo(N, ReplLoad.getValue(0), Token, false); + // Replace uses with load result and token factor + return CombineTo(N, ReplLoad.getValue(0), Token); } } @@ -12744,7 +12786,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && !NoVectors) { // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); + unsigned Elts = i + 1; + if (MemVT.isVector()) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, @@ -13003,7 +13050,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); AddToWorklist(NewStoreChain.getNode()); - MachineMemOperand::Flags MMOFlags = isDereferenceable ? + MachineMemOperand::Flags MMOFlags = isDereferenceable ? MachineMemOperand::MODereferenceable: MachineMemOperand::MONone; @@ -16703,6 +16750,20 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast(BasePtr0.getBase())) + if (auto *B = dyn_cast(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // If the base are the same frame index but the we couldn't find a + // constant offset, (indices are different) be conservative. + if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex()))) + return false; + } + // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis // modified to use BaseIndexOffset. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index ac3247948169..75fec7bd1d48 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1827,10 +1827,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + if (hasOVF) { EVT OvfVT = getSetCCResultType(NVT); SDVTList VTList = DAG.getVTList(NVT, OvfVT); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; @@ -1863,6 +1864,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); + + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { + SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + return; + } + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); @@ -1877,9 +1885,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, - DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); + + SDValue Borrow; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp index 1a8d5a4f45da..0b4c6e551667 100644 --- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -142,9 +142,9 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { return false; // Invalid value for threshold. // Count the number of MachineInstr`s in MachineFunction - int64_t MICount = 0; - for (const auto& MBB : MF) - MICount += MBB.size(); + int64_t MICount = 0; + for (const auto& MBB : MF) + MICount += MBB.size(); // Check if we have a loop. // FIXME: Maybe make this smarter, and see whether the loops are dependent diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index 22f166a2335d..79b9fdefd40e 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -14,7 +14,6 @@ #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" -#include "llvm/DebugInfo/CodeView/TypeServerHandler.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" @@ -42,13 +41,6 @@ static Error visitKnownMember(CVMemberRecord &Record, return Error::success(); } -static Expected deserializeTypeServerRecord(CVType &Record) { - TypeServer2Record R(TypeRecordKind::TypeServer2); - if (auto EC = TypeDeserializer::deserializeAs(Record, R)) - return std::move(EC); - return R; -} - static Error visitMemberRecord(CVMemberRecord &Record, TypeVisitorCallbacks &Callbacks) { if (auto EC = Callbacks.visitMemberBegin(Record)) @@ -84,8 +76,6 @@ class CVTypeVisitor { public: explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks); - void addTypeServerHandler(TypeServerHandler &Handler); - Error visitTypeRecord(CVType &Record, TypeIndex Index); Error visitTypeRecord(CVType &Record); @@ -98,45 +88,15 @@ class CVTypeVisitor { Error visitFieldListMemberStream(BinaryStreamReader &Stream); private: - Expected handleTypeServer(CVType &Record); Error finishVisitation(CVType &Record); /// The interface to the class that gets notified of each visitation. TypeVisitorCallbacks &Callbacks; - - TinyPtrVector Handlers; }; CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks) : Callbacks(Callbacks) {} -void CVTypeVisitor::addTypeServerHandler(TypeServerHandler &Handler) { - Handlers.push_back(&Handler); -} - -Expected CVTypeVisitor::handleTypeServer(CVType &Record) { - if (Record.Type == TypeLeafKind::LF_TYPESERVER2 && !Handlers.empty()) { - auto TS = deserializeTypeServerRecord(Record); - if (!TS) - return TS.takeError(); - - for (auto Handler : Handlers) { - auto ExpectedResult = Handler->handle(*TS, Callbacks); - // If there was an error, return the error. - if (!ExpectedResult) - return ExpectedResult.takeError(); - - // If the handler processed the record, return success. - if (*ExpectedResult) - return true; - - // Otherwise keep searching for a handler, eventually falling out and - // using the default record handler. - } - } - return false; -} - Error CVTypeVisitor::finishVisitation(CVType &Record) { switch (Record.Type) { default: @@ -163,12 +123,6 @@ Error CVTypeVisitor::finishVisitation(CVType &Record) { } Error CVTypeVisitor::visitTypeRecord(CVType &Record, TypeIndex Index) { - auto ExpectedResult = handleTypeServer(Record); - if (!ExpectedResult) - return ExpectedResult.takeError(); - if (*ExpectedResult) - return Error::success(); - if (auto EC = Callbacks.visitTypeBegin(Record, Index)) return EC; @@ -176,12 +130,6 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record, TypeIndex Index) { } Error CVTypeVisitor::visitTypeRecord(CVType &Record) { - auto ExpectedResult = handleTypeServer(Record); - if (!ExpectedResult) - return ExpectedResult.takeError(); - if (*ExpectedResult) - return Error::success(); - if (auto EC = Callbacks.visitTypeBegin(Record)) return EC; @@ -271,52 +219,37 @@ struct VisitHelper { Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index, TypeVisitorCallbacks &Callbacks, - VisitorDataSource Source, - TypeServerHandler *TS) { + VisitorDataSource Source) { VisitHelper V(Callbacks, Source); - if (TS) - V.Visitor.addTypeServerHandler(*TS); return V.Visitor.visitTypeRecord(Record, Index); } Error llvm::codeview::visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, - VisitorDataSource Source, - TypeServerHandler *TS) { + VisitorDataSource Source) { VisitHelper V(Callbacks, Source); - if (TS) - V.Visitor.addTypeServerHandler(*TS); return V.Visitor.visitTypeRecord(Record); } Error llvm::codeview::visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks, - VisitorDataSource Source, - TypeServerHandler *TS) { + VisitorDataSource Source) { VisitHelper V(Callbacks, Source); - if (TS) - V.Visitor.addTypeServerHandler(*TS); return V.Visitor.visitTypeStream(Types); } Error llvm::codeview::visitTypeStream(CVTypeRange Types, - TypeVisitorCallbacks &Callbacks, - TypeServerHandler *TS) { + TypeVisitorCallbacks &Callbacks) { VisitHelper V(Callbacks, VDS_BytesPresent); - if (TS) - V.Visitor.addTypeServerHandler(*TS); return V.Visitor.visitTypeStream(Types); } Error llvm::codeview::visitTypeStream(TypeCollection &Types, - TypeVisitorCallbacks &Callbacks, - TypeServerHandler *TS) { + TypeVisitorCallbacks &Callbacks) { // When the internal visitor calls Types.getType(Index) the interface is // required to return a CVType with the bytes filled out. So we can assume // that the bytes will be present when individual records are visited. VisitHelper V(Callbacks, VDS_BytesPresent); - if (TS) - V.Visitor.addTypeServerHandler(*TS); return V.Visitor.visitTypeStream(Types); } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 711144fc2faa..4fc14480578e 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -168,18 +168,19 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value) { return Error::success(); } -Error CodeViewRecordIO::mapGuid(StringRef &Guid) { +Error CodeViewRecordIO::mapGuid(GUID &Guid) { constexpr uint32_t GuidSize = 16; if (maxFieldLength() < GuidSize) return make_error(cv_error_code::insufficient_buffer); if (isWriting()) { - assert(Guid.size() == 16 && "Invalid Guid Size!"); - if (auto EC = Writer->writeFixedString(Guid)) + if (auto EC = Writer->writeBytes(Guid.Guid)) return EC; } else { - if (auto EC = Reader->readFixedString(Guid, 16)) + ArrayRef GuidBytes; + if (auto EC = Reader->readBytes(GuidBytes, GuidSize)) return EC; + memcpy(Guid.Guid, GuidBytes.data(), GuidSize); } return Error::success(); } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/Formatters.cpp b/contrib/llvm/lib/DebugInfo/CodeView/Formatters.cpp index 1fa8d219d6ac..b8d89c76da3b 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/Formatters.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/Formatters.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/Formatters.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/GUID.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -39,3 +40,9 @@ void GuidAdapter::format(raw_ostream &Stream, StringRef Style) { } Stream << "}"; } + +raw_ostream &llvm::codeview::operator<<(raw_ostream &OS, const GUID &Guid) { + codeview::detail::GuidAdapter A(Guid.Guid); + A.format(OS, ""); + return OS; +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp index c2c02f8de03f..62e73acc72d6 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -186,7 +186,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BuildInfoSym &BuildInfo) { - W.printNumber("BuildId", BuildInfo.BuildId); + printTypeIndex("BuildId", BuildInfo.BuildId); return Error::success(); } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp index 589966705015..e18a35ca1f38 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp @@ -354,7 +354,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { } Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) { - W->printString("Guid", formatv("{0}", fmt_guid(TS.getGuid())).str()); + W->printString("Guid", formatv("{0}", TS.getGuid()).str()); W->printNumber("Age", TS.getAge()); W->printString("Name", TS.getName()); return Error::success(); diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index 71a0966df036..bff3516203a0 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -10,13 +10,11 @@ #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" #include "llvm/Support/ScopedPrinter.h" @@ -57,56 +55,35 @@ namespace { /// streams: an item (or IPI) stream and a type stream, as this is what is /// actually stored in the final PDB. We choose which records go where by /// looking at the record kind. -class TypeStreamMerger : public TypeVisitorCallbacks { +class TypeStreamMerger { public: - explicit TypeStreamMerger(SmallVectorImpl &SourceToDest, - TypeServerHandler *Handler) - : Handler(Handler), IndexMap(SourceToDest) { + explicit TypeStreamMerger(SmallVectorImpl &SourceToDest) + : IndexMap(SourceToDest) { SourceToDest.clear(); } static const TypeIndex Untranslated; - Error visitTypeBegin(CVType &Record) override; - Error visitTypeEnd(CVType &Record) override; - Error mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, - const CVTypeArray &IdsAndTypes); + const CVTypeArray &IdsAndTypes); Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef TypeSourceToDest, - const CVTypeArray &Ids); + const CVTypeArray &Ids); Error mergeTypeRecords(TypeTableBuilder &Dest, const CVTypeArray &Types); private: Error doit(const CVTypeArray &Types); + Error remapAllTypes(const CVTypeArray &Types); + + Error remapType(const CVType &Type); + void addMapping(TypeIndex Idx); bool remapTypeIndex(TypeIndex &Idx); bool remapItemIndex(TypeIndex &Idx); - bool remapIndices(RemappedType &Record, ArrayRef Refs) { - auto OriginalData = Record.OriginalRecord.content(); - bool Success = true; - for (auto &Ref : Refs) { - uint32_t Offset = Ref.Offset; - ArrayRef Bytes = - OriginalData.slice(Ref.Offset, sizeof(TypeIndex)); - ArrayRef TIs(reinterpret_cast(Bytes.data()), - Ref.Count); - for (auto TI : TIs) { - TypeIndex NewTI = TI; - bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef) - ? remapItemIndex(NewTI) - : remapTypeIndex(NewTI); - if (ThisSuccess && NewTI != TI) - Record.Mappings.emplace_back(Offset, NewTI); - Offset += sizeof(TypeIndex); - Success &= ThisSuccess; - } - } - return Success; - } + bool remapIndices(RemappedType &Record, ArrayRef Refs); bool remapIndex(TypeIndex &Idx, ArrayRef Map); @@ -128,21 +105,6 @@ class TypeStreamMerger : public TypeVisitorCallbacks { return Error::success(); } - Error writeTypeRecord(const CVType &Record) { - TypeIndex DestIdx = - DestTypeStream->writeSerializedRecord(Record.RecordData); - addMapping(DestIdx); - return Error::success(); - } - - Error writeTypeRecord(const RemappedType &Record, bool RemapSuccess) { - return writeRecord(*DestTypeStream, Record, RemapSuccess); - } - - Error writeIdRecord(const RemappedType &Record, bool RemapSuccess) { - return writeRecord(*DestIdStream, Record, RemapSuccess); - } - Optional LastError; bool IsSecondPass = false; @@ -153,7 +115,6 @@ class TypeStreamMerger : public TypeVisitorCallbacks { TypeTableBuilder *DestIdStream = nullptr; TypeTableBuilder *DestTypeStream = nullptr; - TypeServerHandler *Handler = nullptr; // If we're only mapping id records, this array contains the mapping for // type records. @@ -168,12 +129,8 @@ class TypeStreamMerger : public TypeVisitorCallbacks { const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); -Error TypeStreamMerger::visitTypeBegin(CVType &Rec) { - RemappedType R(Rec); - SmallVector Refs; - discoverTypeIndices(Rec.RecordData, Refs); - bool Success = remapIndices(R, Refs); - switch (Rec.kind()) { +static bool isIdRecord(TypeLeafKind K) { + switch (K) { case TypeLeafKind::LF_FUNC_ID: case TypeLeafKind::LF_MFUNC_ID: case TypeLeafKind::LF_STRING_ID: @@ -181,19 +138,10 @@ Error TypeStreamMerger::visitTypeBegin(CVType &Rec) { case TypeLeafKind::LF_BUILDINFO: case TypeLeafKind::LF_UDT_SRC_LINE: case TypeLeafKind::LF_UDT_MOD_SRC_LINE: - return writeIdRecord(R, Success); + return true; default: - return writeTypeRecord(R, Success); + return false; } - return Error::success(); -} - -Error TypeStreamMerger::visitTypeEnd(CVType &Rec) { - ++CurIndex; - if (!IsSecondPass) - assert(IndexMap.size() == slotForIndex(CurIndex) && - "visitKnownRecord should add one index map entry"); - return Error::success(); } void TypeStreamMerger::addMapping(TypeIndex Idx) { @@ -256,7 +204,7 @@ bool TypeStreamMerger::remapItemIndex(TypeIndex &Idx) { } Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest, - const CVTypeArray &Types) { + const CVTypeArray &Types) { DestTypeStream = &Dest; return doit(Types); @@ -264,7 +212,7 @@ Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest, Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest, ArrayRef TypeSourceToDest, - const CVTypeArray &Ids) { + const CVTypeArray &Ids) { DestIdStream = &Dest; TypeLookup = TypeSourceToDest; @@ -273,25 +221,14 @@ Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest, Error TypeStreamMerger::mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, - const CVTypeArray &IdsAndTypes) { + const CVTypeArray &IdsAndTypes) { DestIdStream = &DestIds; DestTypeStream = &DestTypes; - return doit(IdsAndTypes); } Error TypeStreamMerger::doit(const CVTypeArray &Types) { - LastError = Error::success(); - - // We don't want to deserialize records. I guess this flag is poorly named, - // but it really means "Don't deserialize records before switching on the - // concrete type. - // FIXME: We can probably get even more speed here if we don't use the visitor - // pipeline here, but instead write the switch ourselves. I don't think it - // would buy us much since it's already pretty fast, but it's probably worth - // a few cycles. - if (auto EC = - codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler)) + if (auto EC = remapAllTypes(Types)) return EC; // If we found bad indices but no other errors, try doing another pass and see @@ -301,50 +238,92 @@ Error TypeStreamMerger::doit(const CVTypeArray &Types) { // topologically sorted. The standard library contains MASM-produced objects, // so this is important to handle correctly, but we don't have to be too // efficient. MASM type streams are usually very small. - while (!*LastError && NumBadIndices > 0) { + while (!LastError && NumBadIndices > 0) { unsigned BadIndicesRemaining = NumBadIndices; IsSecondPass = true; NumBadIndices = 0; CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); - if (auto EC = - codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler)) + if (auto EC = remapAllTypes(Types)) return EC; assert(NumBadIndices <= BadIndicesRemaining && "second pass found more bad indices"); - if (!*LastError && NumBadIndices == BadIndicesRemaining) { + if (!LastError && NumBadIndices == BadIndicesRemaining) { return llvm::make_error( cv_error_code::corrupt_record, "input type graph contains cycles"); } } - Error Ret = std::move(*LastError); - LastError.reset(); - return Ret; + if (LastError) + return std::move(*LastError); + return Error::success(); +} + +Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { + for (const CVType &Type : Types) + if (auto EC = remapType(Type)) + return EC; + return Error::success(); +} + +Error TypeStreamMerger::remapType(const CVType &Type) { + RemappedType R(Type); + SmallVector Refs; + discoverTypeIndices(Type.RecordData, Refs); + bool MappedAllIndices = remapIndices(R, Refs); + TypeTableBuilder &Dest = + isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; + if (auto EC = writeRecord(Dest, R, MappedAllIndices)) + return EC; + + ++CurIndex; + assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && + "visitKnownRecord should add one index map entry"); + return Error::success(); +} + +bool TypeStreamMerger::remapIndices(RemappedType &Record, + ArrayRef Refs) { + ArrayRef OriginalData = Record.OriginalRecord.content(); + bool Success = true; + for (auto &Ref : Refs) { + uint32_t Offset = Ref.Offset; + ArrayRef Bytes = OriginalData.slice(Ref.Offset, sizeof(TypeIndex)); + ArrayRef TIs(reinterpret_cast(Bytes.data()), + Ref.Count); + for (auto TI : TIs) { + TypeIndex NewTI = TI; + bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef) + ? remapItemIndex(NewTI) + : remapTypeIndex(NewTI); + if (ThisSuccess && NewTI != TI) + Record.Mappings.emplace_back(Offset, NewTI); + Offset += sizeof(TypeIndex); + Success &= ThisSuccess; + } + } + return Success; } Error llvm::codeview::mergeTypeRecords(TypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, - TypeServerHandler *Handler, - const CVTypeArray &Types) { - TypeStreamMerger M(SourceToDest, Handler); + const CVTypeArray &Types) { + TypeStreamMerger M(SourceToDest); return M.mergeTypeRecords(Dest, Types); } Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest, ArrayRef TypeSourceToDest, SmallVectorImpl &SourceToDest, - const CVTypeArray &Ids) { - TypeStreamMerger M(SourceToDest, nullptr); + const CVTypeArray &Ids) { + TypeStreamMerger M(SourceToDest); return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); } Error llvm::codeview::mergeTypeAndIdRecords( TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, - SmallVectorImpl &SourceToDest, TypeServerHandler *Handler, - const CVTypeArray &IdsAndTypes) { - - TypeStreamMerger M(SourceToDest, Handler); + SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes) { + TypeStreamMerger M(SourceToDest); return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes); } diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 0a10e6b78911..6cf44ffa3796 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -24,22 +24,166 @@ using namespace llvm; using namespace dwarf; using namespace object; -void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, - DWARFAttribute &AttrValue) { +bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, + uint32_t *Offset, unsigned UnitIndex, + uint8_t &UnitType, bool &isUnitDWARF64) { + uint32_t AbbrOffset, Length; + uint8_t AddrSize = 0; + uint16_t Version; + bool Success = true; + + bool ValidLength = false; + bool ValidVersion = false; + bool ValidAddrSize = false; + bool ValidType = true; + bool ValidAbbrevOffset = true; + + uint32_t OffsetStart = *Offset; + Length = DebugInfoData.getU32(Offset); + if (Length == UINT32_MAX) { + isUnitDWARF64 = true; + OS << format( + "Unit[%d] is in 64-bit DWARF format; cannot verify from this point.\n", + UnitIndex); + return false; + } + Version = DebugInfoData.getU16(Offset); + + if (Version >= 5) { + UnitType = DebugInfoData.getU8(Offset); + AddrSize = DebugInfoData.getU8(Offset); + AbbrOffset = DebugInfoData.getU32(Offset); + ValidType = DWARFUnit::isValidUnitType(UnitType); + } else { + UnitType = 0; + AbbrOffset = DebugInfoData.getU32(Offset); + AddrSize = DebugInfoData.getU8(Offset); + } + + if (!DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset)) + ValidAbbrevOffset = false; + + ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3); + ValidVersion = DWARFContext::isSupportedVersion(Version); + ValidAddrSize = AddrSize == 4 || AddrSize == 8; + if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset || + !ValidType) { + Success = false; + OS << format("Units[%d] - start offset: 0x%08x \n", UnitIndex, OffsetStart); + if (!ValidLength) + OS << "\tError: The length for this unit is too " + "large for the .debug_info provided.\n"; + if (!ValidVersion) + OS << "\tError: The 16 bit unit header version is not valid.\n"; + if (!ValidType) + OS << "\tError: The unit type encoding is not valid.\n"; + if (!ValidAbbrevOffset) + OS << "\tError: The offset into the .debug_abbrev section is " + "not valid.\n"; + if (!ValidAddrSize) + OS << "\tError: The address size is unsupported.\n"; + } + *Offset = OffsetStart + Length + 4; + return Success; +} + +bool DWARFVerifier::verifyUnitContents(DWARFUnit Unit) { + uint32_t NumUnitErrors = 0; + unsigned NumDies = Unit.getNumDIEs(); + for (unsigned I = 0; I < NumDies; ++I) { + auto Die = Unit.getDIEAtIndex(I); + if (Die.getTag() == DW_TAG_null) + continue; + for (auto AttrValue : Die.attributes()) { + NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue); + NumUnitErrors += verifyDebugInfoForm(Die, AttrValue); + } + } + return NumUnitErrors == 0; +} + +bool DWARFVerifier::handleDebugInfo() { + OS << "Verifying .debug_info Unit Header Chain...\n"; + + DWARFDataExtractor DebugInfoData(DCtx.getInfoSection(), DCtx.isLittleEndian(), + 0); + uint32_t NumDebugInfoErrors = 0; + uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0; + uint8_t UnitType = 0; + bool isUnitDWARF64 = false; + bool isHeaderChainValid = true; + bool hasDIE = DebugInfoData.isValidOffset(Offset); + while (hasDIE) { + OffsetStart = Offset; + if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType, + isUnitDWARF64)) { + isHeaderChainValid = false; + if (isUnitDWARF64) + break; + } else { + std::unique_ptr Unit; + switch (UnitType) { + case dwarf::DW_UT_type: + case dwarf::DW_UT_split_type: { + DWARFUnitSection TUSection{}; + Unit.reset(new DWARFTypeUnit( + DCtx, DCtx.getInfoSection(), DCtx.getDebugAbbrev(), + &DCtx.getRangeSection(), DCtx.getStringSection(), + DCtx.getStringOffsetSection(), &DCtx.getAppleObjCSection(), + DCtx.getLineSection(), DCtx.isLittleEndian(), false, TUSection, + nullptr)); + break; + } + case dwarf::DW_UT_skeleton: + case dwarf::DW_UT_split_compile: + case dwarf::DW_UT_compile: + case dwarf::DW_UT_partial: + // UnitType = 0 means that we are + // verifying a compile unit in DWARF v4. + case 0: { + DWARFUnitSection CUSection{}; + Unit.reset(new DWARFCompileUnit( + DCtx, DCtx.getInfoSection(), DCtx.getDebugAbbrev(), + &DCtx.getRangeSection(), DCtx.getStringSection(), + DCtx.getStringOffsetSection(), &DCtx.getAppleObjCSection(), + DCtx.getLineSection(), DCtx.isLittleEndian(), false, CUSection, + nullptr)); + break; + } + default: { llvm_unreachable("Invalid UnitType."); } + } + Unit->extract(DebugInfoData, &OffsetStart); + if (!verifyUnitContents(*Unit)) + ++NumDebugInfoErrors; + } + hasDIE = DebugInfoData.isValidOffset(Offset); + ++UnitIdx; + } + if (UnitIdx == 0 && !hasDIE) { + OS << "Warning: .debug_info is empty.\n"; + isHeaderChainValid = true; + } + NumDebugInfoErrors += verifyDebugInfoReferences(); + return (isHeaderChainValid && NumDebugInfoErrors == 0); +} + +unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, + DWARFAttribute &AttrValue) { + unsigned NumErrors = 0; const auto Attr = AttrValue.Attr; switch (Attr) { case DW_AT_ranges: // Make sure the offset in the DW_AT_ranges attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_AT_ranges offset is beyond .debug_ranges " "bounds:\n"; Die.dump(OS, 0); OS << "\n"; } } else { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; Die.dump(OS, 0); OS << "\n"; @@ -49,7 +193,7 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, // Make sure the offset in the DW_AT_stmt_list attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { if (*SectionOffset >= DCtx.getLineSection().Data.size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_AT_stmt_list offset is beyond .debug_line " "bounds: " << format("0x%08" PRIx32, *SectionOffset) << "\n"; @@ -57,7 +201,7 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, OS << "\n"; } } else { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; Die.dump(OS, 0); OS << "\n"; @@ -67,10 +211,12 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, default: break; } + return NumErrors; } -void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, - DWARFAttribute &AttrValue) { +unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, + DWARFAttribute &AttrValue) { + unsigned NumErrors = 0; const auto Form = AttrValue.Value.getForm(); switch (Form) { case DW_FORM_ref1: @@ -86,7 +232,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); auto CUOffset = AttrValue.Value.getRawUValue(); if (CUOffset >= CUSize) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: " << FormEncodingString(Form) << " CU offset " << format("0x%08" PRIx32, CUOffset) << " is invalid (must be less than CU size of " @@ -108,7 +254,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, assert(RefVal); if (RefVal) { if (*RefVal >= DCtx.getInfoSection().Data.size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_FORM_ref_addr offset beyond .debug_info " "bounds:\n"; Die.dump(OS, 0); @@ -125,7 +271,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, auto SecOffset = AttrValue.Value.getAsSectionOffset(); assert(SecOffset); // DW_FORM_strp is a section offset. if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; Die.dump(OS, 0); OS << "\n"; @@ -135,17 +281,19 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, default: break; } + return NumErrors; } -void DWARFVerifier::verifyDebugInfoReferences() { +unsigned DWARFVerifier::verifyDebugInfoReferences() { // Take all references and make sure they point to an actual DIE by // getting the DIE by offset and emitting an error OS << "Verifying .debug_info references...\n"; + unsigned NumErrors = 0; for (auto Pair : ReferenceToDIEOffsets) { auto Die = DCtx.getDIEForOffset(Pair.first); if (Die) continue; - ++NumDebugInfoErrors; + ++NumErrors; OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) << ". Offset is in between DIEs:\n"; for (auto Offset : Pair.second) { @@ -155,26 +303,7 @@ void DWARFVerifier::verifyDebugInfoReferences() { } OS << "\n"; } -} - -bool DWARFVerifier::handleDebugInfo() { - NumDebugInfoErrors = 0; - OS << "Verifying .debug_info...\n"; - for (const auto &CU : DCtx.compile_units()) { - unsigned NumDies = CU->getNumDIEs(); - for (unsigned I = 0; I < NumDies; ++I) { - auto Die = CU->getDIEAtIndex(I); - const auto Tag = Die.getTag(); - if (Tag == DW_TAG_null) - continue; - for (auto AttrValue : Die.attributes()) { - verifyDebugInfoAttribute(Die, AttrValue); - verifyDebugInfoForm(Die, AttrValue); - } - } - } - verifyDebugInfoReferences(); - return NumDebugInfoErrors == 0; + return NumErrors; } void DWARFVerifier::verifyDebugLineStmtOffsets() { diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index 0b48a366bd24..4c59d2f2a9d9 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -125,16 +125,16 @@ PrivateGetDIAValue(IDiaSymbol *Symbol, return Result8; } -PDB_UniqueId +codeview::GUID PrivateGetDIAValue(IDiaSymbol *Symbol, HRESULT (__stdcall IDiaSymbol::*Method)(GUID *)) { GUID Result; if (S_OK != (Symbol->*Method)(&Result)) - return PDB_UniqueId(); + return codeview::GUID(); - static_assert(sizeof(PDB_UniqueId) == sizeof(GUID), - "PDB_UniqueId is the wrong size!"); - PDB_UniqueId IdResult; + static_assert(sizeof(codeview::GUID) == sizeof(GUID), + "GUID is the wrong size!"); + codeview::GUID IdResult; ::memcpy(&IdResult, &Result, sizeof(GUID)); return IdResult; } @@ -746,7 +746,7 @@ PDB_SymType DIARawSymbol::getSymTag() const { &IDiaSymbol::get_symTag); } -PDB_UniqueId DIARawSymbol::getGuid() const { +codeview::GUID DIARawSymbol::getGuid() const { return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_guid); } diff --git a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp index 789f3b813170..4fcecb92fd15 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp @@ -26,6 +26,8 @@ class GenericErrorCategory : public std::error_category { switch (static_cast(Condition)) { case generic_error_code::unspecified: return "An unknown error has occurred."; + case generic_error_code::type_server_not_found: + return "Type server PDB was not found."; case generic_error_code::dia_sdk_not_present: return "LLVM was not compiled with support for DIA. This usually means " "that you are are not using MSVC, or your Visual Studio " diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp index 21b66b3e7bcf..829879060c33 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -118,7 +118,7 @@ uint32_t InfoStream::getSignature() const { return Signature; } uint32_t InfoStream::getAge() const { return Age; } -PDB_UniqueId InfoStream::getGuid() const { return Guid; } +GUID InfoStream::getGuid() const { return Guid; } uint32_t InfoStream::getNamedStreamMapByteSize() const { return NamedStreamMapByteSize; diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp index 707128f7efd4..6450ae752f96 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -34,7 +34,7 @@ void InfoStreamBuilder::setSignature(uint32_t S) { Sig = S; } void InfoStreamBuilder::setAge(uint32_t A) { Age = A; } -void InfoStreamBuilder::setGuid(PDB_UniqueId G) { Guid = G; } +void InfoStreamBuilder::setGuid(GUID G) { Guid = G; } void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) { Features.push_back(Sig); diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index cb0830f453c8..3241000b06db 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -56,12 +56,12 @@ std::string NativeExeSymbol::getSymbolsFileName() const { return File.getFilePath(); } -PDB_UniqueId NativeExeSymbol::getGuid() const { +codeview::GUID NativeExeSymbol::getGuid() const { auto IS = File.getPDBInfoStream(); if (IS) return IS->getGuid(); consumeError(IS.takeError()); - return PDB_UniqueId{{0}}; + return codeview::GUID{{0}}; } bool NativeExeSymbol::hasCTypes() const { diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 92612bcea4ac..df3f418052a9 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -323,9 +323,7 @@ PDB_SymType NativeRawSymbol::getSymTag() const { return PDB_SymType::None; } -PDB_UniqueId NativeRawSymbol::getGuid() const { - return PDB_UniqueId{{0}}; -} +codeview::GUID NativeRawSymbol::getGuid() const { return codeview::GUID{{0}}; } int32_t NativeRawSymbol::getOffset() const { return 0; diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp deleted file mode 100644 index 9fd90102f72c..000000000000 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp +++ /dev/null @@ -1,126 +0,0 @@ -//===- PDBTypeServerHandler.cpp ---------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Handles CodeView LF_TYPESERVER2 records by attempting to locate a matching -// PDB file, then loading the PDB file and visiting all types from the -// referenced PDB using the original supplied visitor. -// -// The net effect of this is that when visiting a PDB containing a TypeServer -// record, the TypeServer record is "replaced" with all of the records in -// the referenced PDB file. If a single instance of PDBTypeServerHandler -// encounters the same TypeServer multiple times (for example reusing one -// PDBTypeServerHandler across multiple visitations of distinct object files or -// PDB files), PDBTypeServerHandler will optionally revisit all the records -// again, or simply consume the record and do nothing. -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h" - -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/PDB/GenericError.h" -#include "llvm/DebugInfo/PDB/Native/InfoStream.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/TpiStream.h" -#include "llvm/DebugInfo/PDB/PDB.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" - -using namespace llvm; -using namespace llvm::codeview; -using namespace llvm::pdb; - -static void ignoreErrors(Error EC) { - llvm::handleAllErrors(std::move(EC), [&](ErrorInfoBase &EIB) {}); -} - -PDBTypeServerHandler::PDBTypeServerHandler(bool RevisitAlways) - : RevisitAlways(RevisitAlways) {} - -void PDBTypeServerHandler::addSearchPath(StringRef Path) { - if (Path.empty() || !sys::fs::is_directory(Path)) - return; - - SearchPaths.insert(Path); -} - -Expected -PDBTypeServerHandler::handleInternal(PDBFile &File, - TypeVisitorCallbacks &Callbacks) { - auto ExpectedTpi = File.getPDBTpiStream(); - if (!ExpectedTpi) - return ExpectedTpi.takeError(); - - // For handling a type server, we should be using whatever the callback array - // was - // that is being used for the original file. We shouldn't allow the visitor - // to - // arbitrarily stick a deserializer in there. - if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks, - VDS_BytesExternal)) - return std::move(EC); - - return true; -} - -Expected PDBTypeServerHandler::handle(TypeServer2Record &TS, - TypeVisitorCallbacks &Callbacks) { - if (Session) { - // If we've already handled this TypeServer and we only want to handle each - // TypeServer once, consume the record without doing anything. - if (!RevisitAlways) - return true; - - return handleInternal(Session->getPDBFile(), Callbacks); - } - - StringRef File = sys::path::filename(TS.Name); - if (File.empty()) - return make_error( - cv_error_code::corrupt_record, - "TypeServer2Record does not contain filename!"); - - for (auto &Path : SearchPaths) { - SmallString<64> PathStr = Path.getKey(); - sys::path::append(PathStr, File); - if (!sys::fs::exists(PathStr)) - continue; - - std::unique_ptr ThisSession; - if (auto EC = loadDataForPDB(PDB_ReaderType::Native, PathStr, ThisSession)) { - // It is not an error if this PDB fails to load, it just means that it - // doesn't match and we should continue searching. - ignoreErrors(std::move(EC)); - continue; - } - - std::unique_ptr NS( - static_cast(ThisSession.release())); - PDBFile &File = NS->getPDBFile(); - auto ExpectedInfo = File.getPDBInfoStream(); - // All PDB Files should have an Info stream. - if (!ExpectedInfo) - return ExpectedInfo.takeError(); - - // Just because a file with a matching name was found and it was an actual - // PDB file doesn't mean it matches. For it to match the InfoStream's GUID - // must match the GUID specified in the TypeServer2 record. - ArrayRef GuidBytes(ExpectedInfo->getGuid().Guid); - StringRef GuidStr(reinterpret_cast(GuidBytes.begin()), - GuidBytes.size()); - if (GuidStr != TS.Guid) - continue; - - Session = std::move(NS); - return handleInternal(File, Callbacks); - } - - // We couldn't find a matching PDB, so let it be handled by someone else. - return false; -} diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp index 91b8d648fcf9..77a2d57a8369 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -11,101 +11,79 @@ #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/JamCRC.h" using namespace llvm; using namespace llvm::codeview; using namespace llvm::pdb; // Corresponds to `fUDTAnon`. -template static bool isAnonymous(T &Rec) { - StringRef Name = Rec.getName(); +static bool isAnonymous(StringRef Name) { return Name == "" || Name == "__unnamed" || Name.endswith("::") || Name.endswith("::__unnamed"); } -// Computes a hash for a given TPI record. -template -static uint32_t getTpiHash(T &Rec, ArrayRef FullRecord) { - auto Opts = static_cast(Rec.getOptions()); - - bool ForwardRef = - Opts & static_cast(ClassOptions::ForwardReference); - bool Scoped = Opts & static_cast(ClassOptions::Scoped); - bool UniqueName = Opts & static_cast(ClassOptions::HasUniqueName); - bool IsAnon = UniqueName && isAnonymous(Rec); +// Computes the hash for a user-defined type record. This could be a struct, +// class, union, or enum. +static uint32_t getHashForUdt(const TagRecord &Rec, + ArrayRef FullRecord) { + ClassOptions Opts = Rec.getOptions(); + bool ForwardRef = bool(Opts & ClassOptions::ForwardReference); + bool Scoped = bool(Opts & ClassOptions::Scoped); + bool HasUniqueName = bool(Opts & ClassOptions::HasUniqueName); + bool IsAnon = HasUniqueName && isAnonymous(Rec.getName()); if (!ForwardRef && !Scoped && !IsAnon) return hashStringV1(Rec.getName()); - if (!ForwardRef && UniqueName && !IsAnon) + if (!ForwardRef && HasUniqueName && !IsAnon) return hashStringV1(Rec.getUniqueName()); return hashBufferV8(FullRecord); } -template static uint32_t getSourceLineHash(T &Rec) { +template +static Expected getHashForUdt(const CVType &Rec) { + T Deserialized; + if (auto E = TypeDeserializer::deserializeAs(const_cast(Rec), + Deserialized)) + return std::move(E); + return getHashForUdt(Deserialized, Rec.data()); +} + +template +static Expected getSourceLineHash(const CVType &Rec) { + T Deserialized; + if (auto E = TypeDeserializer::deserializeAs(const_cast(Rec), + Deserialized)) + return std::move(E); char Buf[4]; - support::endian::write32le(Buf, Rec.getUDT().getIndex()); + support::endian::write32le(Buf, Deserialized.getUDT().getIndex()); return hashStringV1(StringRef(Buf, 4)); } -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, - UdtSourceLineRecord &Rec) { - CVR.Hash = getSourceLineHash(Rec); -} +Expected llvm::pdb::hashTypeRecord(const CVType &Rec) { + switch (Rec.kind()) { + case LF_CLASS: + case LF_STRUCTURE: + case LF_INTERFACE: + return getHashForUdt(Rec); + case LF_UNION: + return getHashForUdt(Rec); + case LF_ENUM: + return getHashForUdt(Rec); -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, - UdtModSourceLineRecord &Rec) { - CVR.Hash = getSourceLineHash(Rec); -} + case LF_UDT_SRC_LINE: + return getSourceLineHash(Rec); + case LF_UDT_MOD_SRC_LINE: + return getSourceLineHash(Rec); -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, ClassRecord &Rec) { - CVR.Hash = getTpiHash(Rec, CVR.data()); -} + default: + break; + } -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, EnumRecord &Rec) { - CVR.Hash = getTpiHash(Rec, CVR.data()); -} - -void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, UnionRecord &Rec) { - CVR.Hash = getTpiHash(Rec, CVR.data()); -} - -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UdtSourceLineRecord &Rec) { - return verifySourceLine(Rec.getUDT()); -} - -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, - UdtModSourceLineRecord &Rec) { - return verifySourceLine(Rec.getUDT()); -} - -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, ClassRecord &Rec) { - if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); -} -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, EnumRecord &Rec) { - if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); -} -Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UnionRecord &Rec) { - if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); -} - -Error TpiHashVerifier::verifySourceLine(codeview::TypeIndex TI) { - char Buf[4]; - support::endian::write32le(Buf, TI.getIndex()); - uint32_t Hash = hashStringV1(StringRef(Buf, 4)); - if (Hash % NumHashBuckets != HashValues[Index]) - return errorInvalidHash(); - return Error::success(); -} - -Error TpiHashVerifier::visitTypeBegin(CVType &Rec) { - ++Index; - RawRecord = Rec; - return Error::success(); + // Run CRC32 over the bytes. This corresponds to `hashBufv8`. + JamCRC JC(/*Init=*/0U); + ArrayRef Bytes(reinterpret_cast(Rec.data().data()), + Rec.data().size()); + JC.update(Bytes); + return JC.getCRC(); } diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp index f917ef91f639..d3ef87d9009d 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -14,7 +14,6 @@ #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp index faf1142ddf17..c291185bc67a 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp @@ -260,12 +260,6 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, return OS; } -raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UniqueId &Guid) { - codeview::detail::GuidAdapter A(Guid.Guid); - A.format(OS, ""); - return OS; -} - raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UdtType &Type) { switch (Type) { CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Class, "class", OS) diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index adca0eeb08b4..43461de4c491 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -288,7 +288,6 @@ class RuntimeDyldMachOARM HalfDiffKindBits); addRelocationForSection(R, SectionAID); - addRelocationForSection(R, SectionBID); return ++RelI; } diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index 80371780fb6d..170bc544d53f 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -365,7 +365,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break; case CallingConv::PTX_Device: Out << "ptx_device"; break; case CallingConv::X86_64_SysV: Out << "x86_64_sysvcc"; break; - case CallingConv::X86_64_Win64: Out << "x86_64_win64cc"; break; + case CallingConv::Win64: Out << "win64cc"; break; case CallingConv::SPIR_FUNC: Out << "spir_func"; break; case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; case CallingConv::Swift: Out << "swiftcc"; break; @@ -1964,6 +1964,7 @@ static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N, Printer.printString("name", N->getName()); Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); Printer.printMetadata("entity", N->getRawEntity()); + Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); Out << ")"; } diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp index e31779c83e3a..f56fe7089807 100644 --- a/contrib/llvm/lib/IR/Constants.cpp +++ b/contrib/llvm/lib/IR/Constants.cpp @@ -44,8 +44,8 @@ bool Constant::isNegativeZeroValue() const { // Equivalent for a vector of -0.0's. if (const ConstantDataVector *CV = dyn_cast(this)) - if (ConstantFP *SplatCFP = dyn_cast_or_null(CV->getSplatValue())) - if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative()) + if (CV->getElementType()->isFloatingPointTy() && CV->isSplat()) + if (CV->getElementAsAPFloat(0).isNegZero()) return true; if (const ConstantVector *CV = dyn_cast(this)) @@ -70,8 +70,8 @@ bool Constant::isZeroValue() const { // Equivalent for a vector of -0.0's. if (const ConstantDataVector *CV = dyn_cast(this)) - if (ConstantFP *SplatCFP = dyn_cast_or_null(CV->getSplatValue())) - if (SplatCFP && SplatCFP->isZero()) + if (CV->getElementType()->isFloatingPointTy() && CV->isSplat()) + if (CV->getElementAsAPFloat(0).isZero()) return true; if (const ConstantVector *CV = dyn_cast(this)) @@ -113,9 +113,13 @@ bool Constant::isAllOnesValue() const { return Splat->isAllOnesValue(); // Check for constant vectors which are splats of -1 values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isAllOnesValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return CV->getElementAsAPFloat(0).bitcastToAPInt().isAllOnesValue(); + return CV->getElementAsAPInt(0).isAllOnesValue(); + } + } return false; } @@ -135,9 +139,13 @@ bool Constant::isOneValue() const { return Splat->isOneValue(); // Check for constant vectors which are splats of 1 values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isOneValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return CV->getElementAsAPFloat(0).bitcastToAPInt().isOneValue(); + return CV->getElementAsAPInt(0).isOneValue(); + } + } return false; } @@ -157,9 +165,13 @@ bool Constant::isMinSignedValue() const { return Splat->isMinSignedValue(); // Check for constant vectors which are splats of INT_MIN values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isMinSignedValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue(); + return CV->getElementAsAPInt(0).isMinSignedValue(); + } + } return false; } @@ -179,9 +191,13 @@ bool Constant::isNotMinSignedValue() const { return Splat->isNotMinSignedValue(); // Check for constant vectors which are splats of INT_MIN values. - if (const ConstantDataVector *CV = dyn_cast(this)) - if (Constant *Splat = CV->getSplatValue()) - return Splat->isNotMinSignedValue(); + if (const ConstantDataVector *CV = dyn_cast(this)) { + if (CV->isSplat()) { + if (CV->getElementType()->isFloatingPointTy()) + return !CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue(); + return !CV->getElementAsAPInt(0).isMinSignedValue(); + } + } // It *may* contain INT_MIN, we can't tell. return false; @@ -2565,6 +2581,34 @@ uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { } } +APInt ConstantDataSequential::getElementAsAPInt(unsigned Elt) const { + assert(isa(getElementType()) && + "Accessor can only be used when element is an integer"); + const char *EltPtr = getElementPointer(Elt); + + // The data is stored in host byte order, make sure to cast back to the right + // type to load with the right endianness. + switch (getElementType()->getIntegerBitWidth()) { + default: llvm_unreachable("Invalid bitwidth for CDS"); + case 8: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(8, EltVal); + } + case 16: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(16, EltVal); + } + case 32: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(32, EltVal); + } + case 64: { + auto EltVal = *reinterpret_cast(EltPtr); + return APInt(64, EltVal); + } + } +} + APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { const char *EltPtr = getElementPointer(Elt); @@ -2623,17 +2667,21 @@ bool ConstantDataSequential::isCString() const { return Str.drop_back().find(0) == StringRef::npos; } -Constant *ConstantDataVector::getSplatValue() const { +bool ConstantDataVector::isSplat() const { const char *Base = getRawDataValues().data(); // Compare elements 1+ to the 0'th element. unsigned EltSize = getElementByteSize(); for (unsigned i = 1, e = getNumElements(); i != e; ++i) if (memcmp(Base, Base+i*EltSize, EltSize)) - return nullptr; + return false; + return true; +} + +Constant *ConstantDataVector::getSplatValue() const { // If they're all the same, return the 0th one as a representative. - return getElementAsConstant(0); + return isSplat() ? getElementAsConstant(0) : nullptr; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index 2165ae5a9470..aba770457e2f 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -14,7 +14,6 @@ #include "llvm-c/Core.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp index 7e598b43ac16..bce28ba3b950 100644 --- a/contrib/llvm/lib/IR/DIBuilder.cpp +++ b/contrib/llvm/lib/IR/DIBuilder.cpp @@ -148,10 +148,13 @@ DICompileUnit *DIBuilder::createCompileUnit( static DIImportedEntity * createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context, - Metadata *NS, unsigned Line, StringRef Name, + Metadata *NS, DIFile *File, unsigned Line, StringRef Name, SmallVectorImpl &AllImportedModules) { + if (Line) + assert(File && "Source location has line number but no file"); unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size(); - auto *M = DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), Line, Name); + auto *M = + DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), File, Line, Name); if (EntitiesCount < C.pImpl->DIImportedEntitys.size()) // A new Imported Entity was just added to the context. // Add it to the Imported Modules list. @@ -160,33 +163,38 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context, } DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, - DINamespace *NS, + DINamespace *NS, DIFile *File, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, - Context, NS, Line, StringRef(), AllImportedModules); + Context, NS, File, Line, StringRef(), + AllImportedModules); } DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIImportedEntity *NS, - unsigned Line) { + DIFile *File, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, - Context, NS, Line, StringRef(), AllImportedModules); + Context, NS, File, Line, StringRef(), + AllImportedModules); } DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M, - unsigned Line) { + DIFile *File, unsigned Line) { return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, - Context, M, Line, StringRef(), AllImportedModules); + Context, M, File, Line, StringRef(), + AllImportedModules); } DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context, DINode *Decl, + DIFile *File, unsigned Line, StringRef Name) { // Make sure to use the unique identifier based metadata reference for // types that have one. return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration, - Context, Decl, Line, Name, AllImportedModules); + Context, Decl, File, Line, Name, + AllImportedModules); } DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory, diff --git a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp index 0bf68b4c53bb..c14940bad45d 100644 --- a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp @@ -760,12 +760,13 @@ DIObjCProperty *DIObjCProperty::getImpl( DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity, - unsigned Line, MDString *Name, - StorageType Storage, + Metadata *File, unsigned Line, + MDString *Name, StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); - DEFINE_GETIMPL_LOOKUP(DIImportedEntity, (Tag, Scope, Entity, Line, Name)); - Metadata *Ops[] = {Scope, Entity, Name}; + DEFINE_GETIMPL_LOOKUP(DIImportedEntity, + (Tag, Scope, Entity, File, Line, Name)); + Metadata *Ops[] = {Scope, Entity, Name, File}; DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops); } diff --git a/contrib/llvm/lib/IR/Dominators.cpp b/contrib/llvm/lib/IR/Dominators.cpp index 9bd0e297f4ef..4d7e3040ecd7 100644 --- a/contrib/llvm/lib/IR/Dominators.cpp +++ b/contrib/llvm/lib/IR/Dominators.cpp @@ -61,24 +61,30 @@ bool BasicBlockEdge::isSingleEdge() const { //===----------------------------------------------------------------------===// template class llvm::DomTreeNodeBase; -template class llvm::DominatorTreeBase; +template class llvm::DominatorTreeBase; // DomTreeBase +template class llvm::DominatorTreeBase; // PostDomTreeBase -template void llvm::DomTreeBuilder::Calculate( - DominatorTreeBase< - typename std::remove_pointer::NodeRef>::type> - &DT, - Function &F); -template void llvm::DomTreeBuilder::Calculate>( - DominatorTreeBase>::NodeRef>::type> &DT, - Function &F); -template bool llvm::DomTreeBuilder::Verify( - const DominatorTreeBase< - typename std::remove_pointer::NodeRef>::type> - &DT); -template bool llvm::DomTreeBuilder::Verify>( - const DominatorTreeBase>::NodeRef>::type> &DT); +template void +llvm::DomTreeBuilder::Calculate( + DomTreeBuilder::BBDomTree &DT, Function &F); +template void +llvm::DomTreeBuilder::Calculate( + DomTreeBuilder::BBPostDomTree &DT, Function &F); + +template void llvm::DomTreeBuilder::InsertEdge( + DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To); +template void llvm::DomTreeBuilder::InsertEdge( + DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To); + +template void llvm::DomTreeBuilder::DeleteEdge( + DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To); +template void llvm::DomTreeBuilder::DeleteEdge( + DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To); + +template bool llvm::DomTreeBuilder::Verify( + const DomTreeBuilder::BBDomTree &DT); +template bool llvm::DomTreeBuilder::Verify( + const DomTreeBuilder::BBPostDomTree &DT); bool DominatorTree::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &) { diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h index e413a4f34432..bea2c7ae8ff2 100644 --- a/contrib/llvm/lib/IR/LLVMContextImpl.h +++ b/contrib/llvm/lib/IR/LLVMContextImpl.h @@ -990,24 +990,26 @@ template <> struct MDNodeKeyImpl { unsigned Tag; Metadata *Scope; Metadata *Entity; + Metadata *File; unsigned Line; MDString *Name; - MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, unsigned Line, - MDString *Name) - : Tag(Tag), Scope(Scope), Entity(Entity), Line(Line), Name(Name) {} + MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, Metadata *File, + unsigned Line, MDString *Name) + : Tag(Tag), Scope(Scope), Entity(Entity), File(File), Line(Line), + Name(Name) {} MDNodeKeyImpl(const DIImportedEntity *N) : Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()), - Line(N->getLine()), Name(N->getRawName()) {} + File(N->getRawFile()), Line(N->getLine()), Name(N->getRawName()) {} bool isKeyOf(const DIImportedEntity *RHS) const { return Tag == RHS->getTag() && Scope == RHS->getRawScope() && - Entity == RHS->getRawEntity() && Line == RHS->getLine() && - Name == RHS->getRawName(); + Entity == RHS->getRawEntity() && File == RHS->getFile() && + Line == RHS->getLine() && Name == RHS->getRawName(); } unsigned getHashValue() const { - return hash_combine(Tag, Scope, Entity, Line, Name); + return hash_combine(Tag, Scope, Entity, File, Line, Name); } }; diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp index 29e2f42d3e05..995e1e570340 100644 --- a/contrib/llvm/lib/IR/LegacyPassManager.cpp +++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp @@ -625,21 +625,21 @@ void PMTopLevelManager::schedulePass(Pass *P) { checkAnalysis = false; const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); - for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(), - E = RequiredSet.end(); I != E; ++I) { + for (const AnalysisID ID : RequiredSet) { - Pass *AnalysisPass = findAnalysisPass(*I); + Pass *AnalysisPass = findAnalysisPass(ID); if (!AnalysisPass) { - const PassInfo *PI = findAnalysisPassInfo(*I); + const PassInfo *PI = findAnalysisPassInfo(ID); if (!PI) { // Pass P is not in the global PassRegistry dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n"; dbgs() << "Verify if there is a pass dependency cycle." << "\n"; dbgs() << "Required Passes:" << "\n"; - for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(), - E = RequiredSet.end(); I2 != E && I2 != I; ++I2) { - Pass *AnalysisPass2 = findAnalysisPass(*I2); + for (const AnalysisID ID2 : RequiredSet) { + if (ID == ID2) + break; + Pass *AnalysisPass2 = findAnalysisPass(ID2); if (AnalysisPass2) { dbgs() << "\t" << AnalysisPass2->getPassName() << "\n"; } else { @@ -1070,17 +1070,15 @@ void PMDataManager::collectRequiredAndUsedAnalyses( void PMDataManager::initializeAnalysisImpl(Pass *P) { AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - for (AnalysisUsage::VectorType::const_iterator - I = AnUsage->getRequiredSet().begin(), - E = AnUsage->getRequiredSet().end(); I != E; ++I) { - Pass *Impl = findAnalysisPass(*I, true); + for (const AnalysisID ID : AnUsage->getRequiredSet()) { + Pass *Impl = findAnalysisPass(ID, true); if (!Impl) // This may be analysis pass that is initialized on the fly. // If that is not the case then it will raise an assert when it is used. continue; AnalysisResolver *AR = P->getResolver(); assert(AR && "Analysis Resolver is not set"); - AR->addAnalysisImplsPair(*I, Impl); + AR->addAnalysisImplsPair(ID, Impl); } } @@ -1112,21 +1110,19 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{ TPM->collectLastUses(LUses, P); - for (SmallVectorImpl::iterator I = LUses.begin(), - E = LUses.end(); I != E; ++I) { + for (Pass *P : LUses) { dbgs() << "--" << std::string(Offset*2, ' '); - (*I)->dumpPassStructure(0); + P->dumpPassStructure(0); } } void PMDataManager::dumpPassArguments() const { - for (SmallVectorImpl::const_iterator I = PassVector.begin(), - E = PassVector.end(); I != E; ++I) { - if (PMDataManager *PMD = (*I)->getAsPMDataManager()) + for (Pass *P : PassVector) { + if (PMDataManager *PMD = P->getAsPMDataManager()) PMD->dumpPassArguments(); else if (const PassInfo *PI = - TPM->findAnalysisPassInfo((*I)->getPassID())) + TPM->findAnalysisPassInfo(P->getPassID())) if (!PI->isAnalysisGroup()) dbgs() << " -" << PI->getPassArgument(); } @@ -1255,9 +1251,8 @@ Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) { // Destructor PMDataManager::~PMDataManager() { - for (SmallVectorImpl::iterator I = PassVector.begin(), - E = PassVector.end(); I != E; ++I) - delete *I; + for (Pass *P : PassVector) + delete P; } //===----------------------------------------------------------------------===// @@ -1284,35 +1279,35 @@ bool BBPassManager::runOnFunction(Function &F) { bool Changed = doInitialization(F); - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + for (BasicBlock &BB : F) for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { BasicBlockPass *BP = getContainedPass(Index); bool LocalChanged = false; - dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName()); + dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, BB.getName()); dumpRequiredSet(BP); initializeAnalysisImpl(BP); { // If the pass crashes, remember this. - PassManagerPrettyStackEntry X(BP, *I); + PassManagerPrettyStackEntry X(BP, BB); TimeRegion PassTimer(getPassTimer(BP)); - LocalChanged |= BP->runOnBasicBlock(*I); + LocalChanged |= BP->runOnBasicBlock(BB); } Changed |= LocalChanged; if (LocalChanged) dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG, - I->getName()); + BB.getName()); dumpPreservedSet(BP); dumpUsedSet(BP); verifyPreservedAnalysis(BP); removeNotPreservedAnalysis(BP); recordAvailableAnalysis(BP); - removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG); + removeDeadPasses(BP, BB.getName(), ON_BASICBLOCK_MSG); } return doFinalization(F) || Changed; diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp index fdc7de6eaa34..c230a50044c7 100644 --- a/contrib/llvm/lib/IR/Module.cpp +++ b/contrib/llvm/lib/IR/Module.cpp @@ -103,7 +103,7 @@ std::unique_ptr Module::createRNG(const Pass* P) const { // store salt metadata from the Module constructor. Salt += sys::path::filename(getModuleIdentifier()); - return std::unique_ptr{new RandomNumberGenerator(Salt)}; + return std::unique_ptr(new RandomNumberGenerator(Salt)); } /// getNamedValue - Return the first global value in the module with diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp index 4034f9039dda..b052c76d1fed 100644 --- a/contrib/llvm/lib/Object/ArchiveWriter.cpp +++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp @@ -318,7 +318,8 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, continue; if (!(Symflags & object::SymbolRef::SF_Global)) continue; - if (Symflags & object::SymbolRef::SF_Undefined) + if (Symflags & object::SymbolRef::SF_Undefined && + !(Symflags & object::SymbolRef::SF_Indirect)) continue; unsigned NameOffset = NameOS.tell(); diff --git a/contrib/llvm/lib/Object/COFFImportFile.cpp b/contrib/llvm/lib/Object/COFFImportFile.cpp index 740bf94d40e0..d1f46fdfa292 100644 --- a/contrib/llvm/lib/Object/COFFImportFile.cpp +++ b/contrib/llvm/lib/Object/COFFImportFile.cpp @@ -131,14 +131,14 @@ class ObjectFactory { using u32 = support::ulittle32_t; MachineTypes Machine; BumpPtrAllocator Alloc; - StringRef DLLName; + StringRef ImportName; StringRef Library; std::string ImportDescriptorSymbolName; std::string NullThunkSymbolName; public: ObjectFactory(StringRef S, MachineTypes M) - : Machine(M), DLLName(S), Library(S.drop_back(4)), + : Machine(M), ImportName(S), Library(S.drop_back(4)), ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} @@ -162,14 +162,17 @@ class ObjectFactory { // Library Format. NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, ImportType Type, ImportNameType NameType); + + // Create a weak external file which is described in PE/COFF Aux Format 3. + NewArchiveMember createWeakExternal(StringRef Sym, StringRef Weak, bool Imp); }; } // namespace NewArchiveMember ObjectFactory::createImportDescriptor(std::vector &Buffer) { - static const uint32_t NumberOfSections = 2; - static const uint32_t NumberOfSymbols = 7; - static const uint32_t NumberOfRelocations = 3; + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 7; + const uint32_t NumberOfRelocations = 3; // COFF Header coff_file_header Header{ @@ -181,7 +184,7 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { sizeof(coff_import_directory_table_entry) + NumberOfRelocations * sizeof(coff_relocation) + // .idata$4 - (DLLName.size() + 1)), + (ImportName.size() + 1)), u32(NumberOfSymbols), u16(0), u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), @@ -189,7 +192,7 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { append(Buffer, Header); // Section Header Table - static const coff_section SectionTable[NumberOfSections] = { + const coff_section SectionTable[NumberOfSections] = { {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, u32(0), u32(0), @@ -205,7 +208,7 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, u32(0), u32(0), - u32(DLLName.size() + 1), + u32(ImportName.size() + 1), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + sizeof(coff_import_directory_table_entry) + NumberOfRelocations * sizeof(coff_relocation)), @@ -219,12 +222,12 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { append(Buffer, SectionTable); // .idata$2 - static const coff_import_directory_table_entry ImportDescriptor{ + const coff_import_directory_table_entry ImportDescriptor{ u32(0), u32(0), u32(0), u32(0), u32(0), }; append(Buffer, ImportDescriptor); - static const coff_relocation RelocationTable[NumberOfRelocations] = { + const coff_relocation RelocationTable[NumberOfRelocations] = { {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), u16(getImgRelRelocation(Machine))}, {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), @@ -236,9 +239,9 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { // .idata$6 auto S = Buffer.size(); - Buffer.resize(S + DLLName.size() + 1); - memcpy(&Buffer[S], DLLName.data(), DLLName.size()); - Buffer[S + DLLName.size()] = '\0'; + Buffer.resize(S + ImportName.size() + 1); + memcpy(&Buffer[S], ImportName.data(), ImportName.size()); + Buffer[S + ImportName.size()] = '\0'; // Symbol Table coff_symbol16 SymbolTable[NumberOfSymbols] = { @@ -302,13 +305,13 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { NullThunkSymbolName}); StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; - return {MemoryBufferRef(F, DLLName)}; + return {MemoryBufferRef(F, ImportName)}; } NewArchiveMember ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { - static const uint32_t NumberOfSections = 1; - static const uint32_t NumberOfSymbols = 1; + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 1; // COFF Header coff_file_header Header{ @@ -325,7 +328,7 @@ ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { append(Buffer, Header); // Section Header Table - static const coff_section SectionTable[NumberOfSections] = { + const coff_section SectionTable[NumberOfSections] = { {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, u32(0), u32(0), @@ -342,7 +345,7 @@ ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { append(Buffer, SectionTable); // .idata$3 - static const coff_import_directory_table_entry ImportDescriptor{ + const coff_import_directory_table_entry ImportDescriptor{ u32(0), u32(0), u32(0), u32(0), u32(0), }; append(Buffer, ImportDescriptor); @@ -363,12 +366,12 @@ ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { writeStringTable(Buffer, {NullImportDescriptorSymbolName}); StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; - return {MemoryBufferRef(F, DLLName)}; + return {MemoryBufferRef(F, ImportName)}; } NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { - static const uint32_t NumberOfSections = 2; - static const uint32_t NumberOfSymbols = 1; + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 1; uint32_t VASize = is32bit(Machine) ? 4 : 8; // COFF Header @@ -388,7 +391,7 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { append(Buffer, Header); // Section Header Table - static const coff_section SectionTable[NumberOfSections] = { + const coff_section SectionTable[NumberOfSections] = { {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, u32(0), u32(0), @@ -445,14 +448,14 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { writeStringTable(Buffer, {NullThunkSymbolName}); StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; - return {MemoryBufferRef{F, DLLName}}; + return {MemoryBufferRef{F, ImportName}}; } NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, uint16_t Ordinal, ImportType ImportType, ImportNameType NameType) { - size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs + size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs size_t Size = sizeof(coff_import_header) + ImpSize; char *Buf = Alloc.Allocate(Size); memset(Buf, 0, Size); @@ -471,17 +474,96 @@ NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, // Write symbol name and DLL name. memcpy(P, Sym.data(), Sym.size()); P += Sym.size() + 1; - memcpy(P, DLLName.data(), DLLName.size()); + memcpy(P, ImportName.data(), ImportName.size()); - return {MemoryBufferRef(StringRef(Buf, Size), DLLName)}; + return {MemoryBufferRef(StringRef(Buf, Size), ImportName)}; } -std::error_code writeImportLibrary(StringRef DLLName, StringRef Path, +NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, + StringRef Weak, bool Imp) { + std::vector Buffer; + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 5; + + // COFF Header + coff_file_header Header{ + u16(0), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))), + u32(NumberOfSymbols), + u16(0), + u16(0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'd', 'r', 'e', 'c', 't', 'v', 'e'}, + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_LNK_INFO | IMAGE_SCN_LNK_REMOVE)}}; + append(Buffer, SectionTable); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{'@', 'c', 'o', 'm', 'p', '.', 'i', 'd'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'@', 'f', 'e', 'a', 't', '.', '0', '0'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_WEAK_EXTERNAL, + 1}, + {{{2, 0, 0, 0, 3, 0, 0, 0}}, u32(0), u16(0), u16(0), uint8_t(0), 0}, + }; + SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t); + + //__imp_ String Table + if (Imp) { + SymbolTable[3].Name.Offset.Offset = sizeof(uint32_t) + Sym.size() + 7; + writeStringTable(Buffer, {std::string("__imp_").append(Sym), + std::string("__imp_").append(Weak)}); + } else { + SymbolTable[3].Name.Offset.Offset = sizeof(uint32_t) + Sym.size() + 1; + writeStringTable(Buffer, {Sym, Weak}); + } + append(Buffer, SymbolTable); + + // Copied here so we can still use writeStringTable + char *Buf = Alloc.Allocate(Buffer.size()); + memcpy(Buf, Buffer.data(), Buffer.size()); + return {MemoryBufferRef(StringRef(Buf, Buffer.size()), ImportName)}; +} + +std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef Exports, MachineTypes Machine) { std::vector Members; - ObjectFactory OF(llvm::sys::path::filename(DLLName), Machine); + ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); std::vector ImportDescriptor; Members.push_back(OF.createImportDescriptor(ImportDescriptor)); @@ -496,6 +578,12 @@ std::error_code writeImportLibrary(StringRef DLLName, StringRef Path, if (E.Private) continue; + if (E.isWeak()) { + Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false)); + Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true)); + continue; + } + ImportType ImportType = IMPORT_CODE; if (E.Data) ImportType = IMPORT_DATA; diff --git a/contrib/llvm/lib/Object/COFFModuleDefinition.cpp b/contrib/llvm/lib/Object/COFFModuleDefinition.cpp index 0d69cb6b709c..ed9140d1fe08 100644 --- a/contrib/llvm/lib/Object/COFFModuleDefinition.cpp +++ b/contrib/llvm/lib/Object/COFFModuleDefinition.cpp @@ -22,6 +22,7 @@ #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/Error.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" using namespace llvm::COFF; @@ -55,8 +56,10 @@ struct Token { StringRef Value; }; -static bool isDecorated(StringRef Sym) { - return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?"); +static bool isDecorated(StringRef Sym, bool MingwDef) { + // mingw does not prepend "_". + return (!MingwDef && Sym.startswith("_")) || Sym.startswith("@") || + Sym.startswith("?"); } static Error createError(const Twine &Err) { @@ -83,6 +86,9 @@ class Lexer { } case '=': Buf = Buf.drop_front(); + // GNU dlltool accepts both = and ==. + if (Buf.startswith("=")) + Buf = Buf.drop_front(); return Token(Equal, "="); case ',': Buf = Buf.drop_front(); @@ -120,7 +126,8 @@ class Lexer { class Parser { public: - explicit Parser(StringRef S, MachineTypes M) : Lex(S), Machine(M) {} + explicit Parser(StringRef S, MachineTypes M, bool B) + : Lex(S), Machine(M), MingwDef(B) {} Expected parse() { do { @@ -181,14 +188,17 @@ class Parser { std::string Name; if (Error Err = parseName(&Name, &Info.ImageBase)) return Err; - // Append the appropriate file extension if not already present. - StringRef Ext = IsDll ? ".dll" : ".exe"; - if (!StringRef(Name).endswith_lower(Ext)) - Name += Ext; + + Info.ImportName = Name; // Set the output file, but don't override /out if it was already passed. - if (Info.OutputFile.empty()) + if (Info.OutputFile.empty()) { Info.OutputFile = Name; + // Append the appropriate file extension if not already present. + if (!sys::path::has_extension(Name)) + Info.OutputFile += IsDll ? ".dll" : ".exe"; + } + return Error::success(); } case KwVersion: @@ -213,9 +223,9 @@ class Parser { } if (Machine == IMAGE_FILE_MACHINE_I386) { - if (!isDecorated(E.Name)) + if (!isDecorated(E.Name, MingwDef)) E.Name = (std::string("_").append(E.Name)); - if (!E.ExtName.empty() && !isDecorated(E.ExtName)) + if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) E.ExtName = (std::string("_").append(E.ExtName)); } @@ -308,11 +318,13 @@ class Parser { std::vector Stack; MachineTypes Machine; COFFModuleDefinition Info; + bool MingwDef; }; Expected parseCOFFModuleDefinition(MemoryBufferRef MB, - MachineTypes Machine) { - return Parser(MB.getBuffer(), Machine).parse(); + MachineTypes Machine, + bool MingwDef) { + return Parser(MB.getBuffer(), Machine, MingwDef).parse(); } } // namespace object diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index 1e9b0c5b0454..0a2053477caf 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -227,8 +227,11 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { if (Symb.isExternal() || Symb.isWeakExternal()) Result |= SymbolRef::SF_Global; - if (Symb.isWeakExternal()) + if (Symb.isWeakExternal()) { Result |= SymbolRef::SF_Weak; + // We use indirect to allow the archiver to write weak externs + Result |= SymbolRef::SF_Indirect; + } if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE) Result |= SymbolRef::SF_Absolute; diff --git a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp index 0b2ea61c5fe0..81046b217862 100644 --- a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp +++ b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -141,6 +141,33 @@ template struct MemberRecordImpl : public MemberRecordBase { } // end namespace CodeViewYAML } // end namespace llvm +void ScalarTraits::output(const GUID &G, void *, llvm::raw_ostream &OS) { + OS << G; +} + +StringRef ScalarTraits::input(StringRef Scalar, void *Ctx, GUID &S) { + if (Scalar.size() != 38) + return "GUID strings are 38 characters long"; + if (Scalar[0] != '{' || Scalar[37] != '}') + return "GUID is not enclosed in {}"; + if (Scalar[9] != '-' || Scalar[14] != '-' || Scalar[19] != '-' || + Scalar[24] != '-') + return "GUID sections are not properly delineated with dashes"; + + uint8_t *OutBuffer = S.Guid; + for (auto Iter = Scalar.begin(); Iter != Scalar.end();) { + if (*Iter == '-' || *Iter == '{' || *Iter == '}') { + ++Iter; + continue; + } + uint8_t Value = (llvm::hexDigitValue(*Iter++) << 4); + Value |= llvm::hexDigitValue(*Iter++); + *OutBuffer++ = Value; + } + + return ""; +} + void ScalarTraits::output(const TypeIndex &S, void *, raw_ostream &OS) { OS << S.getIndex(); diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp index bcd365236e46..f3b438e829d6 100644 --- a/contrib/llvm/lib/Option/OptTable.cpp +++ b/contrib/llvm/lib/Option/OptTable.cpp @@ -390,27 +390,29 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { return Name; } +namespace { +struct OptionInfo { + std::string Name; + StringRef HelpText; +}; +} // namespace + static void PrintHelpOptionList(raw_ostream &OS, StringRef Title, - std::vector> &OptionHelp) { + std::vector &OptionHelp) { OS << Title << ":\n"; // Find the maximum option length. unsigned OptionFieldWidth = 0; for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) { - // Skip titles. - if (!OptionHelp[i].second) - continue; - // Limit the amount of padding we are willing to give up for alignment. - unsigned Length = OptionHelp[i].first.size(); + unsigned Length = OptionHelp[i].Name.size(); if (Length <= 23) OptionFieldWidth = std::max(OptionFieldWidth, Length); } const unsigned InitialPad = 2; for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) { - const std::string &Option = OptionHelp[i].first; + const std::string &Option = OptionHelp[i].Name; int Pad = OptionFieldWidth - int(Option.size()); OS.indent(InitialPad) << Option; @@ -419,7 +421,7 @@ static void PrintHelpOptionList(raw_ostream &OS, StringRef Title, OS << "\n"; Pad = OptionFieldWidth + InitialPad; } - OS.indent(Pad + 1) << OptionHelp[i].second << '\n'; + OS.indent(Pad + 1) << OptionHelp[i].HelpText << '\n'; } } @@ -458,8 +460,7 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, // Render help text into a map of group-name to a list of (option, help) // pairs. - using helpmap_ty = - std::map>>; + using helpmap_ty = std::map>; helpmap_ty GroupedOptionHelp; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { @@ -478,7 +479,7 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, if (const char *Text = getOptionHelpText(Id)) { const char *HelpGroup = getOptionHelpGroup(*this, Id); const std::string &OptName = getOptionHelpName(*this, Id); - GroupedOptionHelp[HelpGroup].push_back(std::make_pair(OptName, Text)); + GroupedOptionHelp[HelpGroup].push_back({OptName, Text}); } } diff --git a/contrib/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm/lib/Support/ErrorHandling.cpp index fe69151665c6..2fd4f3ea0d45 100644 --- a/contrib/llvm/lib/Support/ErrorHandling.cpp +++ b/contrib/llvm/lib/Support/ErrorHandling.cpp @@ -45,22 +45,36 @@ static void *ErrorHandlerUserData = nullptr; static fatal_error_handler_t BadAllocErrorHandler = nullptr; static void *BadAllocErrorHandlerUserData = nullptr; +#if LLVM_ENABLE_THREADS == 1 // Mutexes to synchronize installing error handlers and calling error handlers. // Do not use ManagedStatic, or that may allocate memory while attempting to // report an OOM. +// +// This usage of std::mutex has to be conditionalized behind ifdefs because +// of this script: +// compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +// That script attempts to statically link the LLVM symbolizer library with the +// STL and hide all of its symbols with 'opt -internalize'. To reduce size, it +// cuts out the threading portions of the hermetic copy of libc++ that it +// builds. We can remove these ifdefs if that script goes away. static std::mutex ErrorHandlerMutex; static std::mutex BadAllocErrorHandlerMutex; +#endif void llvm::install_fatal_error_handler(fatal_error_handler_t handler, void *user_data) { +#if LLVM_ENABLE_THREADS == 1 std::lock_guard Lock(ErrorHandlerMutex); +#endif assert(!ErrorHandler && "Error handler already registered!\n"); ErrorHandler = handler; ErrorHandlerUserData = user_data; } void llvm::remove_fatal_error_handler() { +#if LLVM_ENABLE_THREADS == 1 std::lock_guard Lock(ErrorHandlerMutex); +#endif ErrorHandler = nullptr; ErrorHandlerUserData = nullptr; } @@ -83,7 +97,9 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { { // Only acquire the mutex while reading the handler, so as not to invoke a // user-supplied callback under a lock. +#if LLVM_ENABLE_THREADS == 1 std::lock_guard Lock(ErrorHandlerMutex); +#endif handler = ErrorHandler; handlerData = ErrorHandlerUserData; } @@ -112,14 +128,18 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, void *user_data) { +#if LLVM_ENABLE_THREADS == 1 std::lock_guard Lock(BadAllocErrorHandlerMutex); +#endif assert(!ErrorHandler && "Bad alloc error handler already registered!\n"); BadAllocErrorHandler = handler; BadAllocErrorHandlerUserData = user_data; } void llvm::remove_bad_alloc_error_handler() { +#if LLVM_ENABLE_THREADS == 1 std::lock_guard Lock(BadAllocErrorHandlerMutex); +#endif BadAllocErrorHandler = nullptr; BadAllocErrorHandlerUserData = nullptr; } @@ -130,7 +150,9 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { { // Only acquire the mutex while reading the handler, so as not to invoke a // user-supplied callback under a lock. +#if LLVM_ENABLE_THREADS == 1 std::lock_guard Lock(BadAllocErrorHandlerMutex); +#endif Handler = BadAllocErrorHandler; HandlerData = BadAllocErrorHandlerUserData; } diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index 9f22f89b3c9e..5cf0316d4d71 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -250,6 +250,8 @@ StringRef sys::detail::getHostCPUNameForS390x( Pos += sizeof("machine = ") - 1; unsigned int Id; if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { + if (Id >= 3906 && HaveVectorSupport) + return "z14"; if (Id >= 2964 && HaveVectorSupport) return "z13"; if (Id >= 2827) @@ -460,8 +462,8 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_X64) #if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" @@ -470,6 +472,16 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#else + return true; +#endif #elif defined(_MSC_VER) int registers[4]; __cpuidex(registers, value, subleaf); @@ -481,35 +493,6 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, #else return true; #endif -#elif defined(__i386__) || defined(_M_IX86) -#if defined(__GNUC__) || defined(__clang__) - __asm__("movl\t%%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl\t%%ebx, %%esi\n\t" - : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) - : "a"(value), "c"(subleaf)); - return false; -#elif defined(_MSC_VER) - __asm { - mov eax,value - mov ecx,subleaf - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } - return false; -#else - return true; -#endif -#else - return true; -#endif } // Read control register 0 (XCR0). Used to detect features such as AVX. diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp index e58f856ca244..ea59ba62d7bd 100644 --- a/contrib/llvm/lib/Support/Path.cpp +++ b/contrib/llvm/lib/Support/Path.cpp @@ -13,8 +13,6 @@ #include "llvm/Support/Path.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp index 13bb6f23bc83..e8ef1d2fd8b9 100644 --- a/contrib/llvm/lib/Support/TargetParser.cpp +++ b/contrib/llvm/lib/Support/TargetParser.cpp @@ -452,6 +452,8 @@ bool llvm::AArch64::getExtensionFeatures(unsigned Extensions, Features.push_back("+ras"); if (Extensions & AArch64::AEK_LSE) Features.push_back("+lse"); + if (Extensions & AArch64::AEK_SVE) + Features.push_back("+sve"); return true; } diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp index 601084f9eae3..65eda246a7fe 100644 --- a/contrib/llvm/lib/Support/YAMLTraits.cpp +++ b/contrib/llvm/lib/Support/YAMLTraits.cpp @@ -60,6 +60,14 @@ Input::Input(StringRef InputContent, void *Ctxt, DocIterator = Strm->begin(); } +Input::Input(MemoryBufferRef Input, void *Ctxt, + SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt) + : IO(Ctxt), Strm(new Stream(Input, SrcMgr, false, &EC)) { + if (DiagHandler) + SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); + DocIterator = Strm->begin(); +} + Input::~Input() = default; std::error_code Input::error() { return EC; } diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp index 9480cd46d28f..dd58eccee957 100644 --- a/contrib/llvm/lib/Support/raw_ostream.cpp +++ b/contrib/llvm/lib/Support/raw_ostream.cpp @@ -326,13 +326,30 @@ raw_ostream &raw_ostream::operator<<(const formatv_object_base &Obj) { } raw_ostream &raw_ostream::operator<<(const FormattedString &FS) { - unsigned Len = FS.Str.size(); - int PadAmount = FS.Width - Len; - if (FS.RightJustify && (PadAmount > 0)) - this->indent(PadAmount); - this->operator<<(FS.Str); - if (!FS.RightJustify && (PadAmount > 0)) + if (FS.Str.size() >= FS.Width || FS.Justify == FormattedString::JustifyNone) { + this->operator<<(FS.Str); + return *this; + } + const size_t Difference = FS.Width - FS.Str.size(); + switch (FS.Justify) { + case FormattedString::JustifyLeft: + this->operator<<(FS.Str); + this->indent(Difference); + break; + case FormattedString::JustifyRight: + this->indent(Difference); + this->operator<<(FS.Str); + break; + case FormattedString::JustifyCenter: { + int PadAmount = Difference / 2; this->indent(PadAmount); + this->operator<<(FS.Str); + this->indent(Difference - PadAmount); + break; + } + default: + llvm_unreachable("Bad Justification"); + } return *this; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h index 37b9690d0434..1dda746a6be1 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64.h @@ -44,6 +44,8 @@ ModulePass *createAArch64PromoteConstantPass(); FunctionPass *createAArch64ConditionOptimizerPass(); FunctionPass *createAArch64A57FPLoadBalancing(); FunctionPass *createAArch64A53Fix835769(); +FunctionPass *createFalkorHWPFFixPass(); +FunctionPass *createFalkorMarkStridedAccessesPass(); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); @@ -66,6 +68,8 @@ void initializeAArch64VectorByElementOptPass(PassRegistry&); void initializeAArch64PromoteConstantPass(PassRegistry&); void initializeAArch64RedundantCopyEliminationPass(PassRegistry&); void initializeAArch64StorePairSuppressPass(PassRegistry&); +void initializeFalkorHWPFFixPass(PassRegistry&); +void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry&); } // end namespace llvm diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td index 53eef79c4df3..436bf1193304 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64.td @@ -50,6 +50,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", "Enable Statistical Profiling extension">; +def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", + "Enable Scalable Vector Extension (SVE) instructions">; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -269,6 +272,7 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 938779d23690..291bc5ea858e 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -118,6 +118,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; +// Vararg functions on windows pass floats in integer registers +def CC_AArch64_Win64_VarArg : CallingConv<[ + CCIfType<[f16, f32], CCPromoteToType>, + CCIfType<[f64], CCBitConvertToType>, + CCDelegateTo +]>; + // Darwin uses a calling convention which differs in only two ways // from the standard one at this level: diff --git a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index ee54550c9900..b72f23b109d9 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -102,6 +102,10 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( case AArch64::LDADDALh: case AArch64::LDADDALs: case AArch64::LDADDALd: + case AArch64::LDCLRALb: + case AArch64::LDCLRALh: + case AArch64::LDCLRALs: + case AArch64::LDCLRALd: case AArch64::LDEORALb: case AArch64::LDEORALh: case AArch64::LDEORALs: diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp new file mode 100644 index 000000000000..c0e22355a9ff --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -0,0 +1,790 @@ +//===-- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions +/// that may inhibit the HW prefetching. This is done in two steps. Before +/// ISel, we mark strided loads (i.e. those that will likely benefit from +/// prefetching) with metadata. Then, after opcodes have been finalized, we +/// insert MOVs and re-write loads to prevent unintnentional tag collisions. +// ===---------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "falkor-hwpf-fix" + +STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked"); +STATISTIC(NumCollisionsAvoided, + "Number of HW prefetch tag collisions avoided"); +STATISTIC(NumCollisionsNotAvoided, + "Number of HW prefetch tag collisions not avoided due to lack of regsiters"); + +namespace { + +class FalkorMarkStridedAccesses { +public: + FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE) + : LI(LI), SE(SE) {} + + bool run(); + +private: + bool runOnLoop(Loop &L); + + LoopInfo &LI; + ScalarEvolution &SE; +}; + +class FalkorMarkStridedAccessesLegacy : public FunctionPass { +public: + static char ID; // Pass ID, replacement for typeid + FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) { + initializeFalkorMarkStridedAccessesLegacyPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + // FIXME: For some reason, preserving SE here breaks LSR (even if + // this pass changes nothing). + // AU.addPreserved(); + } + + bool runOnFunction(Function &F) override; +}; +} // namespace + +char FalkorMarkStridedAccessesLegacy::ID = 0; +INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, + "Falkor HW Prefetch Fix", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, + "Falkor HW Prefetch Fix", false, false) + +FunctionPass *llvm::createFalkorMarkStridedAccessesPass() { + return new FalkorMarkStridedAccessesLegacy(); +} + +bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) { + TargetPassConfig &TPC = getAnalysis(); + const AArch64Subtarget *ST = + TPC.getTM().getSubtargetImpl(F); + if (ST->getProcFamily() != AArch64Subtarget::Falkor) + return false; + + if (skipFunction(F)) + return false; + + LoopInfo &LI = getAnalysis().getLoopInfo(); + ScalarEvolution &SE = getAnalysis().getSE(); + + FalkorMarkStridedAccesses LDP(LI, SE); + return LDP.run(); +} + +bool FalkorMarkStridedAccesses::run() { + bool MadeChange = false; + + for (Loop *L : LI) + for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt) + MadeChange |= runOnLoop(**LIt); + + return MadeChange; +} + +bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) { + // Only mark strided loads in the inner-most loop + if (!L.empty()) + return false; + + bool MadeChange = false; + + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + LoadInst *LoadI = dyn_cast(&I); + if (!LoadI) + continue; + + Value *PtrValue = LoadI->getPointerOperand(); + if (L.isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE.getSCEV(PtrValue); + const SCEVAddRecExpr *LSCEVAddRec = dyn_cast(LSCEV); + if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) + continue; + + LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD, + MDNode::get(LoadI->getContext(), {})); + ++NumStridedLoadsMarked; + DEBUG(dbgs() << "Load: " << I << " marked as strided\n"); + MadeChange = true; + } + } + + return MadeChange; +} + +namespace { + +class FalkorHWPFFix : public MachineFunctionPass { +public: + static char ID; + + FalkorHWPFFix() : MachineFunctionPass(ID) { + initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + void runOnLoop(MachineLoop &L, MachineFunction &Fn); + + const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; + DenseMap> TagMap; + bool Modified; +}; + +/// Bits from load opcodes used to compute HW prefetcher instruction tags. +struct LoadInfo { + LoadInfo() + : DestReg(0), BaseReg(0), BaseRegIdx(-1), OffsetOpnd(nullptr), + IsPrePost(false) {} + unsigned DestReg; + unsigned BaseReg; + int BaseRegIdx; + const MachineOperand *OffsetOpnd; + bool IsPrePost; +}; + +} // namespace + +char FalkorHWPFFix::ID = 0; + +INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late", + "Falkor HW Prefetch Fix Late Phase", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late", + "Falkor HW Prefetch Fix Late Phase", false, false) + +static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) { + return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8); +} + +static Optional getLoadInfo(const MachineInstr &MI) { + int DestRegIdx; + int BaseRegIdx; + int OffsetIdx; + bool IsPrePost; + + switch (MI.getOpcode()) { + default: + return None; + + case AArch64::LD1i8: + case AArch64::LD1i16: + case AArch64::LD1i32: + case AArch64::LD1i64: + case AArch64::LD2i8: + case AArch64::LD2i16: + case AArch64::LD2i32: + case AArch64::LD2i64: + case AArch64::LD3i8: + case AArch64::LD3i16: + case AArch64::LD3i32: + case AArch64::LD4i8: + case AArch64::LD4i16: + case AArch64::LD4i32: + DestRegIdx = 0; + BaseRegIdx = 3; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD3i64: + case AArch64::LD4i64: + DestRegIdx = -1; + BaseRegIdx = 3; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD1Onev1d: + case AArch64::LD1Onev2s: + case AArch64::LD1Onev4h: + case AArch64::LD1Onev8b: + case AArch64::LD1Onev2d: + case AArch64::LD1Onev4s: + case AArch64::LD1Onev8h: + case AArch64::LD1Onev16b: + case AArch64::LD1Rv1d: + case AArch64::LD1Rv2s: + case AArch64::LD1Rv4h: + case AArch64::LD1Rv8b: + case AArch64::LD1Rv2d: + case AArch64::LD1Rv4s: + case AArch64::LD1Rv8h: + case AArch64::LD1Rv16b: + case AArch64::LD1Twov1d: + case AArch64::LD1Twov2s: + case AArch64::LD1Twov4h: + case AArch64::LD1Twov8b: + case AArch64::LD2Twov2s: + case AArch64::LD2Twov4s: + case AArch64::LD2Twov8b: + case AArch64::LD2Rv1d: + case AArch64::LD2Rv2s: + case AArch64::LD2Rv4s: + case AArch64::LD2Rv8b: + DestRegIdx = 0; + BaseRegIdx = 1; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD1Twov2d: + case AArch64::LD1Twov4s: + case AArch64::LD1Twov8h: + case AArch64::LD1Twov16b: + case AArch64::LD1Threev1d: + case AArch64::LD1Threev2s: + case AArch64::LD1Threev4h: + case AArch64::LD1Threev8b: + case AArch64::LD1Threev2d: + case AArch64::LD1Threev4s: + case AArch64::LD1Threev8h: + case AArch64::LD1Threev16b: + case AArch64::LD1Fourv1d: + case AArch64::LD1Fourv2s: + case AArch64::LD1Fourv4h: + case AArch64::LD1Fourv8b: + case AArch64::LD1Fourv2d: + case AArch64::LD1Fourv4s: + case AArch64::LD1Fourv8h: + case AArch64::LD1Fourv16b: + case AArch64::LD2Twov2d: + case AArch64::LD2Twov4h: + case AArch64::LD2Twov8h: + case AArch64::LD2Twov16b: + case AArch64::LD2Rv2d: + case AArch64::LD2Rv4h: + case AArch64::LD2Rv8h: + case AArch64::LD2Rv16b: + case AArch64::LD3Threev2s: + case AArch64::LD3Threev4h: + case AArch64::LD3Threev8b: + case AArch64::LD3Threev2d: + case AArch64::LD3Threev4s: + case AArch64::LD3Threev8h: + case AArch64::LD3Threev16b: + case AArch64::LD3Rv1d: + case AArch64::LD3Rv2s: + case AArch64::LD3Rv4h: + case AArch64::LD3Rv8b: + case AArch64::LD3Rv2d: + case AArch64::LD3Rv4s: + case AArch64::LD3Rv8h: + case AArch64::LD3Rv16b: + case AArch64::LD4Fourv2s: + case AArch64::LD4Fourv4h: + case AArch64::LD4Fourv8b: + case AArch64::LD4Fourv2d: + case AArch64::LD4Fourv4s: + case AArch64::LD4Fourv8h: + case AArch64::LD4Fourv16b: + case AArch64::LD4Rv1d: + case AArch64::LD4Rv2s: + case AArch64::LD4Rv4h: + case AArch64::LD4Rv8b: + case AArch64::LD4Rv2d: + case AArch64::LD4Rv4s: + case AArch64::LD4Rv8h: + case AArch64::LD4Rv16b: + DestRegIdx = -1; + BaseRegIdx = 1; + OffsetIdx = -1; + IsPrePost = false; + break; + + case AArch64::LD1i8_POST: + case AArch64::LD1i16_POST: + case AArch64::LD1i32_POST: + case AArch64::LD1i64_POST: + case AArch64::LD2i8_POST: + case AArch64::LD2i16_POST: + case AArch64::LD2i32_POST: + case AArch64::LD2i64_POST: + case AArch64::LD3i8_POST: + case AArch64::LD3i16_POST: + case AArch64::LD3i32_POST: + case AArch64::LD4i8_POST: + case AArch64::LD4i16_POST: + case AArch64::LD4i32_POST: + DestRegIdx = 1; + BaseRegIdx = 4; + OffsetIdx = 5; + IsPrePost = false; + break; + + case AArch64::LD3i64_POST: + case AArch64::LD4i64_POST: + DestRegIdx = -1; + BaseRegIdx = 4; + OffsetIdx = 5; + IsPrePost = false; + break; + + case AArch64::LD1Onev1d_POST: + case AArch64::LD1Onev2s_POST: + case AArch64::LD1Onev4h_POST: + case AArch64::LD1Onev8b_POST: + case AArch64::LD1Onev2d_POST: + case AArch64::LD1Onev4s_POST: + case AArch64::LD1Onev8h_POST: + case AArch64::LD1Onev16b_POST: + case AArch64::LD1Rv1d_POST: + case AArch64::LD1Rv2s_POST: + case AArch64::LD1Rv4h_POST: + case AArch64::LD1Rv8b_POST: + case AArch64::LD1Rv2d_POST: + case AArch64::LD1Rv4s_POST: + case AArch64::LD1Rv8h_POST: + case AArch64::LD1Rv16b_POST: + case AArch64::LD1Twov1d_POST: + case AArch64::LD1Twov2s_POST: + case AArch64::LD1Twov4h_POST: + case AArch64::LD1Twov8b_POST: + case AArch64::LD2Twov2s_POST: + case AArch64::LD2Twov4s_POST: + case AArch64::LD2Twov8b_POST: + case AArch64::LD2Rv1d_POST: + case AArch64::LD2Rv2s_POST: + case AArch64::LD2Rv4s_POST: + case AArch64::LD2Rv8b_POST: + DestRegIdx = 1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LD1Twov2d_POST: + case AArch64::LD1Twov4s_POST: + case AArch64::LD1Twov8h_POST: + case AArch64::LD1Twov16b_POST: + case AArch64::LD1Threev1d_POST: + case AArch64::LD1Threev2s_POST: + case AArch64::LD1Threev4h_POST: + case AArch64::LD1Threev8b_POST: + case AArch64::LD1Threev2d_POST: + case AArch64::LD1Threev4s_POST: + case AArch64::LD1Threev8h_POST: + case AArch64::LD1Threev16b_POST: + case AArch64::LD1Fourv1d_POST: + case AArch64::LD1Fourv2s_POST: + case AArch64::LD1Fourv4h_POST: + case AArch64::LD1Fourv8b_POST: + case AArch64::LD1Fourv2d_POST: + case AArch64::LD1Fourv4s_POST: + case AArch64::LD1Fourv8h_POST: + case AArch64::LD1Fourv16b_POST: + case AArch64::LD2Twov2d_POST: + case AArch64::LD2Twov4h_POST: + case AArch64::LD2Twov8h_POST: + case AArch64::LD2Twov16b_POST: + case AArch64::LD2Rv2d_POST: + case AArch64::LD2Rv4h_POST: + case AArch64::LD2Rv8h_POST: + case AArch64::LD2Rv16b_POST: + case AArch64::LD3Threev2s_POST: + case AArch64::LD3Threev4h_POST: + case AArch64::LD3Threev8b_POST: + case AArch64::LD3Threev2d_POST: + case AArch64::LD3Threev4s_POST: + case AArch64::LD3Threev8h_POST: + case AArch64::LD3Threev16b_POST: + case AArch64::LD3Rv1d_POST: + case AArch64::LD3Rv2s_POST: + case AArch64::LD3Rv4h_POST: + case AArch64::LD3Rv8b_POST: + case AArch64::LD3Rv2d_POST: + case AArch64::LD3Rv4s_POST: + case AArch64::LD3Rv8h_POST: + case AArch64::LD3Rv16b_POST: + case AArch64::LD4Fourv2s_POST: + case AArch64::LD4Fourv4h_POST: + case AArch64::LD4Fourv8b_POST: + case AArch64::LD4Fourv2d_POST: + case AArch64::LD4Fourv4s_POST: + case AArch64::LD4Fourv8h_POST: + case AArch64::LD4Fourv16b_POST: + case AArch64::LD4Rv1d_POST: + case AArch64::LD4Rv2s_POST: + case AArch64::LD4Rv4h_POST: + case AArch64::LD4Rv8b_POST: + case AArch64::LD4Rv2d_POST: + case AArch64::LD4Rv4s_POST: + case AArch64::LD4Rv8h_POST: + case AArch64::LD4Rv16b_POST: + DestRegIdx = -1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LDRBBroW: + case AArch64::LDRBBroX: + case AArch64::LDRBBui: + case AArch64::LDRBroW: + case AArch64::LDRBroX: + case AArch64::LDRBui: + case AArch64::LDRDl: + case AArch64::LDRDroW: + case AArch64::LDRDroX: + case AArch64::LDRDui: + case AArch64::LDRHHroW: + case AArch64::LDRHHroX: + case AArch64::LDRHHui: + case AArch64::LDRHroW: + case AArch64::LDRHroX: + case AArch64::LDRHui: + case AArch64::LDRQl: + case AArch64::LDRQroW: + case AArch64::LDRQroX: + case AArch64::LDRQui: + case AArch64::LDRSBWroW: + case AArch64::LDRSBWroX: + case AArch64::LDRSBWui: + case AArch64::LDRSBXroW: + case AArch64::LDRSBXroX: + case AArch64::LDRSBXui: + case AArch64::LDRSHWroW: + case AArch64::LDRSHWroX: + case AArch64::LDRSHWui: + case AArch64::LDRSHXroW: + case AArch64::LDRSHXroX: + case AArch64::LDRSHXui: + case AArch64::LDRSWl: + case AArch64::LDRSWroW: + case AArch64::LDRSWroX: + case AArch64::LDRSWui: + case AArch64::LDRSl: + case AArch64::LDRSroW: + case AArch64::LDRSroX: + case AArch64::LDRSui: + case AArch64::LDRWl: + case AArch64::LDRWroW: + case AArch64::LDRWroX: + case AArch64::LDRWui: + case AArch64::LDRXl: + case AArch64::LDRXroW: + case AArch64::LDRXroX: + case AArch64::LDRXui: + case AArch64::LDURBBi: + case AArch64::LDURBi: + case AArch64::LDURDi: + case AArch64::LDURHHi: + case AArch64::LDURHi: + case AArch64::LDURQi: + case AArch64::LDURSBWi: + case AArch64::LDURSBXi: + case AArch64::LDURSHWi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::LDURSi: + case AArch64::LDURWi: + case AArch64::LDURXi: + DestRegIdx = 0; + BaseRegIdx = 1; + OffsetIdx = 2; + IsPrePost = false; + break; + + case AArch64::LDRBBpost: + case AArch64::LDRBBpre: + case AArch64::LDRBpost: + case AArch64::LDRBpre: + case AArch64::LDRDpost: + case AArch64::LDRDpre: + case AArch64::LDRHHpost: + case AArch64::LDRHHpre: + case AArch64::LDRHpost: + case AArch64::LDRHpre: + case AArch64::LDRQpost: + case AArch64::LDRQpre: + case AArch64::LDRSBWpost: + case AArch64::LDRSBWpre: + case AArch64::LDRSBXpost: + case AArch64::LDRSBXpre: + case AArch64::LDRSHWpost: + case AArch64::LDRSHWpre: + case AArch64::LDRSHXpost: + case AArch64::LDRSHXpre: + case AArch64::LDRSWpost: + case AArch64::LDRSWpre: + case AArch64::LDRSpost: + case AArch64::LDRSpre: + case AArch64::LDRWpost: + case AArch64::LDRWpre: + case AArch64::LDRXpost: + case AArch64::LDRXpre: + DestRegIdx = 1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = true; + break; + + case AArch64::LDPDi: + case AArch64::LDPQi: + DestRegIdx = -1; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LDPSWi: + case AArch64::LDPSi: + case AArch64::LDPWi: + case AArch64::LDPXi: + DestRegIdx = 0; + BaseRegIdx = 2; + OffsetIdx = 3; + IsPrePost = false; + break; + + case AArch64::LDPQpost: + case AArch64::LDPQpre: + DestRegIdx = -1; + BaseRegIdx = 3; + OffsetIdx = 4; + IsPrePost = true; + break; + + case AArch64::LDPDpost: + case AArch64::LDPDpre: + case AArch64::LDPSWpost: + case AArch64::LDPSWpre: + case AArch64::LDPSpost: + case AArch64::LDPSpre: + case AArch64::LDPWpost: + case AArch64::LDPWpre: + case AArch64::LDPXpost: + case AArch64::LDPXpre: + DestRegIdx = 1; + BaseRegIdx = 3; + OffsetIdx = 4; + IsPrePost = true; + break; + } + + LoadInfo LI; + LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg(); + LI.BaseReg = MI.getOperand(BaseRegIdx).getReg(); + LI.BaseRegIdx = BaseRegIdx; + LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx); + LI.IsPrePost = IsPrePost; + return LI; +} + +static Optional getTag(const TargetRegisterInfo *TRI, + const MachineInstr &MI, const LoadInfo &LI) { + unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0; + unsigned Base = TRI->getEncodingValue(LI.BaseReg); + unsigned Off; + if (LI.OffsetOpnd == nullptr) + Off = 0; + else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() || + LI.OffsetOpnd->isCPI()) + return None; + else if (LI.OffsetOpnd->isReg()) + Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg()); + else + Off = LI.OffsetOpnd->getImm() >> 2; + + return makeTag(Dest, Base, Off); +} + +void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { + // Build the initial tag map for the whole loop. + TagMap.clear(); + for (MachineBasicBlock *MBB : L.getBlocks()) + for (MachineInstr &MI : *MBB) { + Optional LInfo = getLoadInfo(MI); + if (!LInfo) + continue; + Optional Tag = getTag(TRI, MI, *LInfo); + if (!Tag) + continue; + TagMap[*Tag].push_back(&MI); + } + + bool AnyCollisions = false; + for (auto &P : TagMap) { + auto Size = P.second.size(); + if (Size > 1) { + for (auto *MI : P.second) { + if (TII->isStridedAccess(*MI)) { + AnyCollisions = true; + break; + } + } + } + if (AnyCollisions) + break; + } + // Nothing to fix. + if (!AnyCollisions) + return; + + MachineRegisterInfo &MRI = Fn.getRegInfo(); + + // Go through all the basic blocks in the current loop and fix any streaming + // loads to avoid collisions with any other loads. + LiveRegUnits LR(*TRI); + for (MachineBasicBlock *MBB : L.getBlocks()) { + LR.clear(); + LR.addLiveOuts(*MBB); + for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) { + MachineInstr &MI = *I; + if (!TII->isStridedAccess(MI)) + continue; + + LoadInfo LdI = *getLoadInfo(MI); + unsigned OldTag = *getTag(TRI, MI, LdI); + auto &OldCollisions = TagMap[OldTag]; + if (OldCollisions.size() <= 1) + continue; + + bool Fixed = false; + DEBUG(dbgs() << "Attempting to fix tag collision: " << MI); + + for (unsigned ScratchReg : AArch64::GPR64RegClass) { + if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg)) + continue; + + LoadInfo NewLdI(LdI); + NewLdI.BaseReg = ScratchReg; + unsigned NewTag = *getTag(TRI, MI, NewLdI); + // Scratch reg tag would collide too, so don't use it. + if (TagMap.count(NewTag)) + continue; + + DEBUG(dbgs() << "Changing base reg to: " << PrintReg(ScratchReg, TRI) + << '\n'); + + // Rewrite: + // Xd = LOAD Xb, off + // to: + // Xc = MOV Xb + // Xd = LOAD Xc, off + DebugLoc DL = MI.getDebugLoc(); + BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg) + .addReg(AArch64::XZR) + .addReg(LdI.BaseReg) + .addImm(0); + MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx); + BaseOpnd.setReg(ScratchReg); + + // If the load does a pre/post increment, then insert a MOV after as + // well to update the real base register. + if (LdI.IsPrePost) { + DEBUG(dbgs() << "Doing post MOV of incremented reg: " + << PrintReg(ScratchReg, TRI) << '\n'); + MI.getOperand(0).setReg( + ScratchReg); // Change tied operand pre/post update dest. + BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL, + TII->get(AArch64::ORRXrs), LdI.BaseReg) + .addReg(AArch64::XZR) + .addReg(ScratchReg) + .addImm(0); + } + + for (int I = 0, E = OldCollisions.size(); I != E; ++I) + if (OldCollisions[I] == &MI) { + std::swap(OldCollisions[I], OldCollisions[E - 1]); + OldCollisions.pop_back(); + break; + } + + // Update TagMap to reflect instruction changes to reduce the number + // of later MOVs to be inserted. This needs to be done after + // OldCollisions is updated since it may be relocated by this + // insertion. + TagMap[NewTag].push_back(&MI); + ++NumCollisionsAvoided; + Fixed = true; + Modified = true; + break; + } + if (!Fixed) + ++NumCollisionsNotAvoided; + } + } +} + +bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { + auto &ST = static_cast(Fn.getSubtarget()); + if (ST.getProcFamily() != AArch64Subtarget::Falkor) + return false; + + if (skipFunction(*Fn.getFunction())) + return false; + + TII = static_cast(ST.getInstrInfo()); + TRI = ST.getRegisterInfo(); + + assert(TRI->trackLivenessAfterRegAlloc(Fn) && + "Register liveness not available!"); + + MachineLoopInfo &LI = getAnalysis(); + + Modified = false; + + for (MachineLoop *I : LI) + for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) + // Only process inner-loops + if (L->empty()) + runOnLoop(**L, Fn); + + return Modified; +} + +FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 3682b62d2b84..97396057dce0 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -5138,6 +5138,7 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectOperator(I, I->getOpcode()); // Silence warnings. (void)&CC_AArch64_DarwinPCS_VarArg; + (void)&CC_AArch64_Win64_VarArg; } namespace llvm { diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index e96ee7d29b3e..4907d082eda0 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -41,6 +41,10 @@ // | | // |-----------------------------------| // | | +// | (Win64 only) varargs from reg | +// | | +// |-----------------------------------| +// | | // | prev_fp, prev_lr | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) @@ -950,7 +954,13 @@ static void computeCalleeSaveRegisterPairs( CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); - unsigned Offset = AFI->getCalleeSavedStackSize(); + int Offset = AFI->getCalleeSavedStackSize(); + + unsigned GPRSaveSize = AFI->getVarArgsGPRSize(); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + if (IsWin64) + Offset -= alignTo(GPRSaveSize, 16); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 04687847c1a3..06005f6b6886 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -239,10 +239,17 @@ bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( case InlineAsm::Constraint_i: case InlineAsm::Constraint_m: case InlineAsm::Constraint_Q: - // Require the address to be in a register. That is safe for all AArch64 - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); + // We need to make sure that this one operand does not end up in XZR, thus + // require the address to be in a PointerRegClass register. + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); + SDLoc dl(Op); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); + SDValue NewOp = + SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + dl, Op.getValueType(), + Op, RC), 0); + OutOps.push_back(NewOp); return false; } return true; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 60fde5caa339..c6150f9e5d1d 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2650,9 +2650,13 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::PreserveMost: case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: + if (Subtarget->isTargetWindows() && IsVarArg) + return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; + case CallingConv::Win64: + return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; } } @@ -2668,6 +2672,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -2824,10 +2829,12 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // varargs AArch64FunctionInfo *FuncInfo = MF.getInfo(); if (isVarArg) { - if (!Subtarget->isTargetDarwin()) { + if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should // save them for future reference. + // Win64 variadic functions also pass arguments in registers, but all float + // arguments are passed in integer registers. saveVarArgRegisters(CCInfo, DAG, DL, Chain); } @@ -2869,6 +2876,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); SmallVector MemOps; @@ -2881,7 +2889,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { - GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); + if (IsWin64) + GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); + else + GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); @@ -2890,7 +2901,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); SDValue Store = DAG.getStore( Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); + IsWin64 + ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + GPRIdx, + (i - FirstVariadicGPR) * 8) + : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); @@ -2899,7 +2914,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, FuncInfo->setVarArgsGPRIndex(GPRIdx); FuncInfo->setVarArgsGPRSize(GPRSaveSize); - if (Subtarget->hasFPARMv8()) { + if (Subtarget->hasFPARMv8() && !IsWin64) { static const MCPhysReg FPRArgRegs[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; @@ -4491,6 +4506,21 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, MachinePointerInfo(SV)); } +SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op, + SelectionDAG &DAG) const { + AArch64FunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo(); + + SDLoc DL(Op); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0 + ? FuncInfo->getVarArgsGPRIndex() + : FuncInfo->getVarArgsStackIndex(), + getPointerTy(DAG.getDataLayout())); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV)); +} + SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call @@ -4562,8 +4592,14 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) - : LowerAAPCS_VASTART(Op, DAG); + MachineFunction &MF = DAG.getMachineFunction(); + + if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) + return LowerWin64_VASTART(Op, DAG); + else if (Subtarget->isTargetDarwin()) + return LowerDarwin_VASTART(Op, DAG); + else + return LowerAAPCS_VASTART(Op, DAG); } SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, @@ -4571,7 +4607,8 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); - unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; + unsigned VaListSize = + Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); @@ -7451,6 +7488,14 @@ AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy, return (DL.getTypeSizeInBits(VecTy) + 127) / 128; } +MachineMemOperand::Flags +AArch64TargetLowering::getMMOFlags(const Instruction &I) const { + if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor && + I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr) + return MOStridedAccess; + return MachineMemOperand::MONone; +} + bool AArch64TargetLowering::isLegalInterleavedAccessType( VectorType *VecTy, const DataLayout &DL) const { @@ -10567,9 +10612,6 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { if (Size > 128) return AtomicExpansionKind::None; // Nand not supported in LSE. if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; - // Currently leaving And and Sub to LLSC - if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub)) - return AtomicExpansionKind::LLSC; // Leave 128 bits to LLSC. return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; } @@ -10783,7 +10825,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { unsigned AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const { - if (Subtarget->isTargetDarwin()) + if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) return getPointerTy(DL).getSizeInBits(); return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index ecc2517fb288..3b0e0f1de894 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -408,6 +408,19 @@ class AArch64TargetLowering : public TargetLowering { bool isIntDivCheap(EVT VT, AttributeList Attr) const override; + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override { + // Do not merge to float value size (128 bytes) if no implicit + // float attribute is set. + + bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + if (NoFloat) + return (MemVT.getSizeInBits() <= 64); + return true; + } + bool isCheapToSpeculateCttz() const override { return true; } @@ -455,6 +468,8 @@ class AArch64TargetLowering : public TargetLowering { unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const; + MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; + private: bool isExtFreeImpl(const Instruction *Ext) const override; @@ -541,6 +556,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index de283b70210f..eec41ddbc159 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -451,3 +451,13 @@ def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)> def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>; def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>; def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_sub_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_sub_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd (SUBXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>; + +def : Pat<(atomic_load_and_8 GPR64:$Rn, GPR32:$Rs), (LDCLRALb (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_16 GPR64:$Rn, GPR32:$Rs), (LDCLRALh (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_32 GPR64:$Rn, GPR32:$Rs), (LDCLRALs (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>; +def : Pat<(atomic_load_and_64 GPR64:$Rn, GPR64:$Rs), (LDCLRALd (ORNXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index dba3e4bdf82f..c0c6055c358f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -52,9 +52,6 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" -static const MachineMemOperand::Flags MOSuppressPair = - MachineMemOperand::MOTargetFlag1; - static cl::opt TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); @@ -1715,6 +1712,13 @@ void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const { (*MI.memoperands_begin())->setFlags(MOSuppressPair); } +/// Check all MachineMemOperands for a hint that the load/store is strided. +bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const { + return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { + return MMO->getFlags() & MOStridedAccess; + }); +} + bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const { switch (Opc) { default: @@ -4433,7 +4437,8 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { ArrayRef> AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { static const std::pair TargetFlags[] = - {{MOSuppressPair, "aarch64-suppress-pair"}}; + {{MOSuppressPair, "aarch64-suppress-pair"}, + {MOStridedAccess, "aarch64-strided-access"}}; return makeArrayRef(TargetFlags); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 0809ede4df2a..1765a0263ea4 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -27,6 +27,13 @@ namespace llvm { class AArch64Subtarget; class AArch64TargetMachine; +static const MachineMemOperand::Flags MOSuppressPair = + MachineMemOperand::MOTargetFlag1; +static const MachineMemOperand::Flags MOStridedAccess = + MachineMemOperand::MOTargetFlag2; + +#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" + class AArch64InstrInfo final : public AArch64GenInstrInfo { const AArch64RegisterInfo RI; const AArch64Subtarget &Subtarget; @@ -81,6 +88,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// unprofitable. bool isLdStPairSuppressed(const MachineInstr &MI) const; + /// Return true if the given load or store is a strided memory access. + bool isStridedAccess(const MachineInstr &MI) const; + /// Return true if this is an unscaled load/store. bool isUnscaledLdSt(unsigned Opc) const; @@ -356,7 +366,7 @@ enum AArch64FrameOffsetStatus { /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to /// use an offset.eq /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be -/// rewriten in @p MI. +/// rewritten in @p MI. /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the /// amount that is off the limit of the legal offset. /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 0be14673eb20..0dcf07f98412 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -37,6 +37,8 @@ def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16", "fullfp16">; def HasSPE : Predicate<"Subtarget->hasSPE()">, AssemblerPredicate<"FeatureSPE", "spe">; +def HasSVE : Predicate<"Subtarget->hasSVE()">, + AssemblerPredicate<"FeatureSVE", "sve">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 4a0a7c36baf8..ffb27834c31c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -82,7 +82,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({Op, 1, s1}, Legal); } - for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV}) for (auto Ty : {s32, s64}) setAction({BinOp, Ty}, Legal); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index fab92e139dd0..9f7dcb3fe1c3 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -74,7 +74,7 @@ const uint32_t * AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; @@ -167,7 +167,7 @@ bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const { const TargetRegisterClass * AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { - return &AArch64::GPR64RegClass; + return &AArch64::GPR64spRegClass; } const TargetRegisterClass * diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index a3238cf3b60f..ea6112452736 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -134,7 +134,9 @@ void AArch64Subtarget::initializeProperties() { case CortexA72: PrefFunctionAlignment = 4; break; - case CortexA73: break; + case CortexA73: + PrefFunctionAlignment = 4; + break; case Others: break; } } @@ -171,7 +173,8 @@ struct AArch64GISelActualAccessor : public GISelAccessor { AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian) - : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()), + : AArch64GenSubtargetInfo(TT, CPU, FS), + ReserveX18(TT.isOSDarwin() || TT.isOSWindows()), IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), TLInfo(TM, *this), GISel() { diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h index db53946cbc77..5a1f45ee2552 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -70,6 +70,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasFullFP16 = false; bool HasSPE = false; bool HasLSLFast = false; + bool HasSVE = false; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove = false; @@ -251,6 +252,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasFullFP16() const { return HasFullFP16; } bool hasSPE() const { return HasSPE; } bool hasLSLFast() const { return HasLSLFast; } + bool hasSVE() const { return HasSVE; } bool isLittleEndian() const { return IsLittle; } @@ -304,6 +306,17 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool enableEarlyIfConversion() const override; std::unique_ptr getCustomPBQPConstraints() const override; + + bool isCallingConvWin64(CallingConv::ID CC) const { + switch (CC) { + case CallingConv::C: + return isTargetWindows(); + case CallingConv::Win64: + return true; + default: + return false; + } + } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 6237b8f3e7b9..ba28c01a2eff 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -138,6 +138,9 @@ static cl::opt EnableGlobalISelAtO( cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"), cl::init(-1)); +static cl::opt EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", + cl::init(true), cl::Hidden); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(getTheAArch64leTarget()); @@ -158,6 +161,8 @@ extern "C" void LLVMInitializeAArch64Target() { initializeAArch64PromoteConstantPass(*PR); initializeAArch64RedundantCopyEliminationPass(*PR); initializeAArch64StorePairSuppressPass(*PR); + initializeFalkorHWPFFixPass(*PR); + initializeFalkorMarkStridedAccessesLegacyPass(*PR); initializeLDTLSCleanupPass(*PR); } @@ -182,7 +187,7 @@ static std::string computeDataLayout(const Triple &TT, if (TT.isOSBinFormatMachO()) return "e-m:o-i64:64-i128:128-n32:64-S128"; if (TT.isOSBinFormatCOFF()) - return "e-m:w-i64:64-i128:128-n32:64-S128"; + return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; if (LittleEndian) return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; @@ -346,8 +351,12 @@ void AArch64PassConfig::addIRPasses() { // // Run this before LSR to remove the multiplies involved in computing the // pointer values N iterations ahead. - if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch) - addPass(createLoopDataPrefetchPass()); + if (TM->getOptLevel() != CodeGenOpt::None) { + if (EnableLoopDataPrefetch) + addPass(createLoopDataPrefetchPass()); + if (EnableFalkorHWPFFix) + addPass(createFalkorMarkStridedAccessesPass()); + } TargetPassConfig::addIRPasses(); @@ -478,8 +487,12 @@ void AArch64PassConfig::addPreSched2() { // Expand some pseudo instructions to allow proper scheduling. addPass(createAArch64ExpandPseudoPass()); // Use load/store pair instructions when possible. - if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt) - addPass(createAArch64LoadStoreOptimizationPass()); + if (TM->getOptLevel() != CodeGenOpt::None) { + if (EnableLoadStoreOpt) + addPass(createAArch64LoadStoreOptimizationPass()); + if (EnableFalkorHWPFFix) + addPass(createFalkorHWPFFixPass()); + } } void AArch64PassConfig::addPreEmitPass() { diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h index fefa7e26b79f..85de02e859e0 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -36,6 +36,8 @@ class AArch64TargetMachine : public LLVMTargetMachine { ~AArch64TargetMachine() override; const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some, targets is + // deprecated and should not be used. const AArch64Subtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index e841fb894519..a79d51820545 100644 --- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -86,7 +86,7 @@ class AArch64AsmParser : public MCTargetAsmParser { bool parseOperand(OperandVector &Operands, bool isCondCode, bool invertCondCode); - bool showMatchError(SMLoc Loc, unsigned ErrCode); + bool showMatchError(SMLoc Loc, unsigned ErrCode, OperandVector &Operands); bool parseDirectiveArch(SMLoc L); bool parseDirectiveCPU(SMLoc L); @@ -3257,7 +3257,10 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, } } -bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { +std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS); + +bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, + OperandVector &Operands) { switch (ErrCode) { case Match_MissingFeature: return Error(Loc, @@ -3380,8 +3383,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { return Error(Loc, "expected readable system register"); case Match_MSR: return Error(Loc, "expected writable system register or pstate"); - case Match_MnemonicFail: - return Error(Loc, "unrecognized instruction mnemonic"); + case Match_MnemonicFail: { + std::string Suggestion = AArch64MnemonicSpellCheck( + ((AArch64Operand &)*Operands[0]).getToken(), + ComputeAvailableFeatures(STI->getFeatureBits())); + return Error(Loc, "unrecognized instruction mnemonic" + Suggestion); + } default: llvm_unreachable("unexpected error code!"); } @@ -3707,7 +3714,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, Msg); } case Match_MnemonicFail: - return showMatchError(IDLoc, MatchResult); + return showMatchError(IDLoc, MatchResult, Operands); case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; @@ -3726,7 +3733,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix()) MatchResult = Match_InvalidSuffix; - return showMatchError(ErrorLoc, MatchResult); + return showMatchError(ErrorLoc, MatchResult, Operands); } case Match_InvalidMemoryIndexed1: case Match_InvalidMemoryIndexed2: @@ -3784,7 +3791,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - return showMatchError(ErrorLoc, MatchResult); + return showMatchError(ErrorLoc, MatchResult, Operands); } } diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index a7a7daf4b4a5..2bd0cbf9f7c6 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -104,8 +104,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case FK_Data_1: return 1; - case FK_Data_2: case AArch64::fixup_aarch64_movw: + case FK_Data_2: + case FK_SecRel_2: return 2; case AArch64::fixup_aarch64_pcrel_branch14: @@ -124,6 +125,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case AArch64::fixup_aarch64_pcrel_branch26: case AArch64::fixup_aarch64_pcrel_call26: case FK_Data_4: + case FK_SecRel_4: return 4; case FK_Data_8: @@ -218,6 +220,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: case FK_Data_8: + case FK_SecRel_2: + case FK_SecRel_4: return Value; } } diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp index 7862a03e771c..31762b9e4cd5 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -27,8 +27,7 @@ namespace { class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { public: AArch64WinCOFFObjectWriter() - : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) { - } + : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {} ~AArch64WinCOFFObjectWriter() override = default; @@ -36,19 +35,59 @@ class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; - bool recordRelocation(const MCFixup &) const override; + bool recordRelocation(const MCFixup &) const override; }; } // end anonymous namespace -unsigned -AArch64WinCOFFObjectWriter::getRelocType(MCContext &Ctx, - const MCValue &Target, - const MCFixup &Fixup, - bool IsCrossSection, - const MCAsmBackend &MAB) const { - const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); - report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); +unsigned AArch64WinCOFFObjectWriter::getRelocType( + MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, + bool IsCrossSection, const MCAsmBackend &MAB) const { + auto Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None + : Target.getSymA()->getKind(); + + switch (static_cast(Fixup.getKind())) { + default: { + const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); + report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); + } + + case FK_Data_4: + switch (Modifier) { + default: + return COFF::IMAGE_REL_ARM64_ADDR32; + case MCSymbolRefExpr::VK_COFF_IMGREL32: + return COFF::IMAGE_REL_ARM64_ADDR32NB; + case MCSymbolRefExpr::VK_SECREL: + return COFF::IMAGE_REL_ARM64_SECREL; + } + + case FK_Data_8: + return COFF::IMAGE_REL_ARM64_ADDR64; + + case FK_SecRel_2: + return COFF::IMAGE_REL_ARM64_SECTION; + + case FK_SecRel_4: + return COFF::IMAGE_REL_ARM64_SECREL; + + case AArch64::fixup_aarch64_add_imm12: + return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A; + + case AArch64::fixup_aarch64_ldst_imm12_scale1: + case AArch64::fixup_aarch64_ldst_imm12_scale2: + case AArch64::fixup_aarch64_ldst_imm12_scale4: + case AArch64::fixup_aarch64_ldst_imm12_scale8: + case AArch64::fixup_aarch64_ldst_imm12_scale16: + return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L; + + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + return COFF::IMAGE_REL_ARM64_PAGEBASE_REL21; + + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: + return COFF::IMAGE_REL_ARM64_BRANCH26; + } } bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h index 5a799b2d88d0..568682899be5 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -56,7 +56,7 @@ extern char &AMDGPUMachineCFGStructurizerID; void initializeAMDGPUAlwaysInlinePass(PassRegistry&); -ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); +Pass *createAMDGPUAnnotateKernelFeaturesPass(); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 7235d8fae332..c68e5861ff25 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -15,8 +15,10 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -26,26 +28,27 @@ using namespace llvm; namespace { -class AMDGPUAnnotateKernelFeatures : public ModulePass { +class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: + const TargetMachine *TM = nullptr; AMDGPUAS AS; - static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); - void addAttrToCallers(Function *Intrin, StringRef AttrName); - bool addAttrsForIntrinsics(Module &M, ArrayRef); + bool addFeatureAttributes(Function &F); public: static char ID; - AMDGPUAnnotateKernelFeatures() : ModulePass(ID) {} - bool runOnModule(Module &M) override; + AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} + + bool doInitialization(CallGraph &CG) override; + bool runOnSCC(CallGraphSCC &SCC) override; StringRef getPassName() const override { return "AMDGPU Annotate Kernel Features"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - ModulePass::getAnalysisUsage(AU); + CallGraphSCCPass::getAnalysisUsage(AU); } static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); @@ -121,16 +124,130 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( return false; } -// Return true if an addrspacecast is used that requires the queue ptr. -bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, - AMDGPUAS AS) { +// We do not need to note the x workitem or workgroup id because they are always +// initialized. +// +// TODO: We should not add the attributes if the known compile time workgroup +// size is 1 for y/z. +static StringRef intrinsicToAttrName(Intrinsic::ID ID, + bool &NonKernelOnly, + bool &IsQueuePtr) { + switch (ID) { + case Intrinsic::amdgcn_workitem_id_x: + NonKernelOnly = true; + return "amdgpu-work-item-id-x"; + case Intrinsic::amdgcn_workgroup_id_x: + NonKernelOnly = true; + return "amdgpu-work-group-id-x"; + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + return "amdgpu-work-item-id-y"; + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + return "amdgpu-work-item-id-z"; + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::r600_read_tgid_y: + return "amdgpu-work-group-id-y"; + case Intrinsic::amdgcn_workgroup_id_z: + case Intrinsic::r600_read_tgid_z: + return "amdgpu-work-group-id-z"; + case Intrinsic::amdgcn_dispatch_ptr: + return "amdgpu-dispatch-ptr"; + case Intrinsic::amdgcn_dispatch_id: + return "amdgpu-dispatch-id"; + case Intrinsic::amdgcn_kernarg_segment_ptr: + case Intrinsic::amdgcn_implicitarg_ptr: + return "amdgpu-kernarg-segment-ptr"; + case Intrinsic::amdgcn_queue_ptr: + case Intrinsic::trap: + case Intrinsic::debugtrap: + IsQueuePtr = true; + return "amdgpu-queue-ptr"; + default: + return ""; + } +} + +static bool handleAttr(Function &Parent, const Function &Callee, + StringRef Name) { + if (Callee.hasFnAttribute(Name)) { + Parent.addFnAttr(Name); + return true; + } + + return false; +} + +static void copyFeaturesToFunction(Function &Parent, const Function &Callee, + bool &NeedQueuePtr) { + // X ids unnecessarily propagated to kernels. + static const StringRef AttrNames[] = { + { "amdgpu-work-item-id-x" }, + { "amdgpu-work-item-id-y" }, + { "amdgpu-work-item-id-z" }, + { "amdgpu-work-group-id-x" }, + { "amdgpu-work-group-id-y" }, + { "amdgpu-work-group-id-z" }, + { "amdgpu-dispatch-ptr" }, + { "amdgpu-dispatch-id" }, + { "amdgpu-kernarg-segment-ptr" } + }; + + if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) + NeedQueuePtr = true; + + for (StringRef AttrName : AttrNames) + handleAttr(Parent, Callee, AttrName); +} + +bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { + const AMDGPUSubtarget &ST = TM->getSubtarget(F); + bool HasFlat = ST.hasFlatAddressSpace(); + bool HasApertureRegs = ST.hasApertureRegs(); SmallPtrSet ConstantExprVisited; - for (const BasicBlock &BB : F) { - for (const Instruction &I : BB) { + bool Changed = false; + bool NeedQueuePtr = false; + bool HaveCall = false; + bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + CallSite CS(&I); + if (CS) { + Function *Callee = CS.getCalledFunction(); + + // TODO: Do something with indirect calls. + if (!Callee) { + if (!CS.isInlineAsm()) + HaveCall = true; + continue; + } + + Intrinsic::ID IID = Callee->getIntrinsicID(); + if (IID == Intrinsic::not_intrinsic) { + HaveCall = true; + copyFeaturesToFunction(F, *Callee, NeedQueuePtr); + Changed = true; + } else { + bool NonKernelOnly = false; + StringRef AttrName = intrinsicToAttrName(IID, + NonKernelOnly, NeedQueuePtr); + if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { + F.addFnAttr(AttrName); + Changed = true; + } + } + } + + if (NeedQueuePtr || HasApertureRegs) + continue; + if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) { - if (castRequiresQueuePtr(ASC, AS)) - return true; + if (castRequiresQueuePtr(ASC, AS)) { + NeedQueuePtr = true; + continue; + } } for (const Use &U : I.operands()) { @@ -138,100 +255,57 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) - return true; + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { + NeedQueuePtr = true; + break; + } } } } + if (NeedQueuePtr) { + F.addFnAttr("amdgpu-queue-ptr"); + Changed = true; + } + + // TODO: We could refine this to captured pointers that could possibly be + // accessed by flat instructions. For now this is mostly a poor way of + // estimating whether there are calls before argument lowering. + if (HasFlat && !IsFunc && HaveCall) { + F.addFnAttr("amdgpu-flat-scratch"); + Changed = true; + } + + return Changed; +} + +bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { + Module &M = SCC.getCallGraph().getModule(); + Triple TT(M.getTargetTriple()); + + bool Changed = false; + for (CallGraphNode *I : SCC) { + Function *F = I->getFunction(); + if (!F || F->isDeclaration()) + continue; + + Changed |= addFeatureAttributes(*F); + } + + + return Changed; +} + +bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + report_fatal_error("TargetMachine is required"); + + AS = AMDGPU::getAMDGPUAS(CG.getModule()); + TM = &TPC->getTM(); return false; } -void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, - StringRef AttrName) { - SmallPtrSet SeenFuncs; - - for (User *U : Intrin->users()) { - // CallInst is the only valid user for an intrinsic. - CallInst *CI = cast(U); - - Function *CallingFunction = CI->getParent()->getParent(); - if (SeenFuncs.insert(CallingFunction).second) - CallingFunction->addFnAttr(AttrName); - } -} - -bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( - Module &M, - ArrayRef IntrinsicToAttr) { - bool Changed = false; - - for (const StringRef *Arr : IntrinsicToAttr) { - if (Function *Fn = M.getFunction(Arr[0])) { - addAttrToCallers(Fn, Arr[1]); - Changed = true; - } - } - - return Changed; -} - -bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { - Triple TT(M.getTargetTriple()); - AS = AMDGPU::getAMDGPUAS(M); - - static const StringRef IntrinsicToAttr[][2] = { - // .x omitted - { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, - { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, - - { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, - { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, - - { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, - { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, - - // .x omitted - { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, - { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } - }; - - static const StringRef HSAIntrinsicToAttr[][2] = { - { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, - { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, - { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, - { "llvm.trap", "amdgpu-queue-ptr" }, - { "llvm.debugtrap", "amdgpu-queue-ptr" } - }; - - // TODO: We should not add the attributes if the known compile time workgroup - // size is 1 for y/z. - - // TODO: Intrinsics that require queue ptr. - - // We do not need to note the x workitem or workgroup id because they are - // always initialized. - - bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); - if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { - Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); - - for (Function &F : M) { - if (F.hasFnAttribute("amdgpu-queue-ptr")) - continue; - - auto *TPC = getAnalysisIfAvailable(); - bool HasApertureRegs = TPC && TPC->getTM() - .getSubtarget(F) - .hasApertureRegs(); - if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) - F.addFnAttr("amdgpu-queue-ptr"); - } - } - - return Changed; -} - -ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { +Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { return new AMDGPUAnnotateKernelFeatures(); } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 83ad1a5c6ee3..2247814cfe55 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -268,19 +268,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF)); - OutStreamer->emitRawComment(" codeLenInByte = " + - Twine(getFunctionCodeSize(MF)), false); - OutStreamer->emitRawComment( - " NumSgprs: " + Twine(CurrentProgramInfo.NumSGPR), false); - OutStreamer->emitRawComment( - " NumVgprs: " + Twine(CurrentProgramInfo.NumVGPR), false); - OutStreamer->emitRawComment( " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false); OutStreamer->emitRawComment( " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false); - OutStreamer->emitRawComment( - " ScratchSize: " + Twine(CurrentProgramInfo.ScratchSize), false); OutStreamer->emitRawComment( " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) + " bytes/workgroup (compile time only)", false); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 2553cf4da0fe..258b1737deb3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -573,6 +573,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FABS); + setTargetDAGCombine(ISD::AssertZext); + setTargetDAGCombine(ISD::AssertSext); } //===----------------------------------------------------------------------===// @@ -883,7 +885,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting /// input values across multiple registers. Each item in the Ins array -/// represents a single value that will be stored in regsters. Ins[x].VT is +/// represents a single value that will be stored in registers. Ins[x].VT is /// the value type of the value that will be stored in the register, so /// whatever SDNode we lower the argument to needs to be this type. /// @@ -2591,6 +2593,31 @@ SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N, return SDValue(CSrc, 0); } +// FIXME: This should go in generic DAG combiner with an isTruncateFree check, +// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU +// issues. +SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + + // (vt2 (assertzext (truncate vt0:x), vt1)) -> + // (vt2 (truncate (assertzext vt0:x, vt1))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue N1 = N->getOperand(1); + EVT ExtVT = cast(N1)->getVT(); + SDLoc SL(N); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (SrcVT.bitsGE(ExtVT)) { + SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1); + return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg); + } + } + + return SDValue(); +} /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( @@ -3521,6 +3548,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, break; } + case ISD::AssertZext: + case ISD::AssertSext: + return performAssertSZExtCombine(N, DCI); } return SDValue(); } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index a45234e2b39f..d85aada6053a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -76,6 +76,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 1bc5a52053ec..779617629010 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -277,7 +277,7 @@ std::pair AMDGPUSubtarget::getWavesPerEU( // Make sure requested values are compatible with values implied by requested // minimum/maximum flat work group sizes. if (RequestedFlatWorkGroupSize && - Requested.first > MinImpliedByFlatWorkGroupSize) + Requested.first < MinImpliedByFlatWorkGroupSize) return Default; return Requested; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 22cede59086a..d4b6a5fe8020 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -359,6 +359,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return FP64FP16Denormals; } + bool supportsMinMaxDenormModes() const { + return getGeneration() >= AMDGPUSubtarget::GFX9; + } + bool hasFPExceptions() const { return FPExceptions; } diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e3c90f250600..b37c274102bc 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1208,7 +1208,7 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { } bool AMDGPUOperand::isLiteralImm(MVT type) const { - // Check that this imediate can be added as literal + // Check that this immediate can be added as literal if (!isImmTy(ImmTyNone)) { return false; } diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index f26e49295e69..966c6fec20c6 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -87,6 +87,7 @@ DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) DECODE_OPERAND_REG(VGPR_32) DECODE_OPERAND_REG(VS_32) DECODE_OPERAND_REG(VS_64) +DECODE_OPERAND_REG(VS_128) DECODE_OPERAND_REG(VReg_64) DECODE_OPERAND_REG(VReg_96) @@ -318,6 +319,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { return decodeSrcOp(OPW64, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { + return decodeSrcOp(OPW128, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { return decodeSrcOp(OPW16, Val); } diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 3d71db909e20..4c755be09999 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -70,6 +70,7 @@ class AMDGPUDisassembler : public MCDisassembler { MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; MCOperand decodeOperand_VS_64(unsigned Val) const; + MCOperand decodeOperand_VS_128(unsigned Val) const; MCOperand decodeOperand_VSrc16(unsigned Val) const; MCOperand decodeOperand_VSrcV216(unsigned Val) const; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 3af242d9ea66..0aad8f0843d6 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -653,6 +653,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, // again. The same constant folded instruction could also have a second // use operand. NextUse = MRI->use_begin(Dst.getReg()); + FoldList.clear(); continue; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 08a64de38501..7334781916d8 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -158,7 +158,7 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // No replacement necessary. if (ScratchWaveOffsetReg == AMDGPU::NoRegister || !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { - assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister); + assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG); return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); } @@ -246,13 +246,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, // this point it appears we need the setup. This part of the prolog should be // emitted after frame indices are eliminated. - if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit()) + if (MFI->hasFlatScratchInit()) emitFlatScratchInit(ST, MF, MBB); unsigned SPReg = MFI->getStackPtrOffsetReg(); - if (SPReg != AMDGPU::NoRegister) { + if (SPReg != AMDGPU::SP_REG) { + assert(MRI.isReserved(SPReg) && "SPReg used but not reserved"); + DebugLoc DL; - int64_t StackSize = MF.getFrameInfo().getStackSize(); + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + int64_t StackSize = FrameInfo.getStackSize(); if (StackSize == 0) { BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2ba570b9ebbb..2356405f0919 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1171,8 +1171,7 @@ static void allocateSystemSGPRs(CCState &CCInfo, static void reservePrivateMemoryRegs(const TargetMachine &TM, MachineFunction &MF, const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info, - bool NeedSP) { + SIMachineFunctionInfo &Info) { // Now that we've figured out where the scratch register inputs are, see if // should reserve the arguments and use them directly. MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -1234,15 +1233,6 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, Info.setScratchWaveOffsetReg(ReservedOffsetReg); } } - - if (NeedSP) { - unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF); - Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg); - - assert(Info.getStackPtrOffsetReg() != Info.getFrameOffsetReg()); - assert(!TRI.isSubRegister(Info.getScratchRSrcReg(), - Info.getStackPtrOffsetReg())); - } } SDValue SITargetLowering::LowerFormalArguments( @@ -1380,10 +1370,37 @@ SDValue SITargetLowering::LowerFormalArguments( unsigned Reg = VA.getLocReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); + EVT ValVT = VA.getValVT(); Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, DL, VT, Val, + DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, DL, VT, Val, + DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + default: + llvm_unreachable("Unknown loc info!"); + } + if (IsShader && Arg.VT.isVector()) { // Build a vector from the registers Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); @@ -1410,25 +1427,13 @@ SDValue SITargetLowering::LowerFormalArguments( InVals.push_back(Val); } - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - - // TODO: Could maybe omit SP if only tail calls? - bool NeedSP = FrameInfo.hasCalls() || FrameInfo.hasVarSizedObjects(); - // Start adding system SGPRs. if (IsEntryFunc) { allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader); - reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info, NeedSP); } else { CCInfo.AllocateReg(Info->getScratchRSrcReg()); CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); CCInfo.AllocateReg(Info->getFrameOffsetReg()); - - if (NeedSP) { - unsigned StackPtrReg = findFirstFreeSGPR(CCInfo); - CCInfo.AllocateReg(StackPtrReg); - Info->setStackPtrOffsetReg(StackPtrReg); - } } return Chains.empty() ? Chain : @@ -4624,8 +4629,8 @@ static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { return DAG.isKnownNeverNaN(Op); } -static bool isCanonicalized(SDValue Op, const SISubtarget *ST, - unsigned MaxDepth=5) { +static bool isCanonicalized(SelectionDAG &DAG, SDValue Op, + const SISubtarget *ST, unsigned MaxDepth=5) { // If source is a result of another standard FP operation it is already in // canonical form. @@ -4663,7 +4668,7 @@ static bool isCanonicalized(SDValue Op, const SISubtarget *ST, case ISD::FNEG: case ISD::FABS: return (MaxDepth > 0) && - isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1); + isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1); case ISD::FSIN: case ISD::FCOS: @@ -4672,16 +4677,19 @@ static bool isCanonicalized(SDValue Op, const SISubtarget *ST, // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. // For such targets need to check their input recursively. - // TODO: on GFX9+ we could return true without checking provided no-nan - // mode, since canonicalization is also used to quiet sNaNs. case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINNAN: case ISD::FMAXNAN: + if (ST->supportsMinMaxDenormModes() && + DAG.isKnownNeverNaN(Op.getOperand(0)) && + DAG.isKnownNeverNaN(Op.getOperand(1))) + return true; + return (MaxDepth > 0) && - isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1) && - isCanonicalized(Op.getOperand(1), ST, MaxDepth - 1); + isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1) && + isCanonicalized(DAG, Op.getOperand(1), ST, MaxDepth - 1); case ISD::ConstantFP: { auto F = cast(Op)->getValueAPF(); @@ -4700,11 +4708,19 @@ SDValue SITargetLowering::performFCanonicalizeCombine( if (!CFP) { SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType().getScalarType(); + auto ST = getSubtarget(); + + if (((VT == MVT::f32 && ST->hasFP32Denormals()) || + (VT == MVT::f64 && ST->hasFP64Denormals()) || + (VT == MVT::f16 && ST->hasFP16Denormals())) && + DAG.isKnownNeverNaN(N0)) + return N0; bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction()); if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) && - isCanonicalized(N0, getSubtarget())) + isCanonicalized(DAG, N0, ST)) return N0; return SDValue(); @@ -5813,3 +5829,44 @@ SITargetLowering::getConstraintType(StringRef Constraint) const { } return TargetLowering::getConstraintType(Constraint); } + +// Figure out which registers should be reserved for stack access. Only after +// the function is legalized do we know all of the non-spill stack objects or if +// calls are present. +void SITargetLowering::finalizeLowering(MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *Info = MF.getInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SISubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + if (Info->isEntryFunction()) { + // Callable functions have fixed registers used for stack access. + reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info); + } + + // We have to assume the SP is needed in case there are calls in the function + // during lowering. Calls are only detected after the function is + // lowered. We're about to reserve registers, so don't bother using it if we + // aren't really going to use it. + bool NeedSP = !Info->isEntryFunction() || + MFI.hasVarSizedObjects() || + MFI.hasCalls(); + + if (NeedSP) { + unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF); + Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg); + + assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg()); + assert(!TRI->isSubRegister(Info->getScratchRSrcReg(), + Info->getStackPtrOffsetReg())); + MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg()); + } + + MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg()); + MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg()); + MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG, + Info->getScratchWaveOffsetReg()); + + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h index 83392a7ab1b2..e6bb3d6cd419 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -232,6 +232,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { ConstraintType getConstraintType(StringRef Constraint) const override; SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const; + + void finalizeLowering(MachineFunction &MF) const override; }; } // End namespace llvm diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 160f8837d49c..a7e0feb10b9f 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3408,8 +3408,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { } void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { - SmallVector Worklist; - Worklist.push_back(&TopInst); + SetVectorType Worklist; + Worklist.insert(&TopInst); while (!Worklist.empty()) { MachineInstr &Inst = *Worklist.pop_back_val(); @@ -3610,7 +3610,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } } -void SIInstrInfo::lowerScalarAbs(SmallVectorImpl &Worklist, +void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3635,7 +3635,7 @@ void SIInstrInfo::lowerScalarAbs(SmallVectorImpl &Worklist, } void SIInstrInfo::splitScalar64BitUnaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3686,7 +3686,7 @@ void SIInstrInfo::splitScalar64BitUnaryOp( } void SIInstrInfo::splitScalar64BitBinaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3753,7 +3753,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp( } void SIInstrInfo::splitScalar64BitBCNT( - SmallVectorImpl &Worklist, MachineInstr &Inst) const { + SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3789,7 +3789,7 @@ void SIInstrInfo::splitScalar64BitBCNT( addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } -void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, +void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3853,12 +3853,12 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, void SIInstrInfo::addUsersToMoveToVALUWorklist( unsigned DstReg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const { + SetVectorType &Worklist) const { for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), E = MRI.use_end(); I != E;) { MachineInstr &UseMI = *I->getParent(); if (!canReadVGPR(UseMI, I.getOperandNo())) { - Worklist.push_back(&UseMI); + Worklist.insert(&UseMI); do { ++I; @@ -3869,7 +3869,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( } } -void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, +void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -3932,7 +3932,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, } void SIInstrInfo::addSCCDefUsersToVALUWorklist( - MachineInstr &SCCDefInst, SmallVectorImpl &Worklist) const { + MachineInstr &SCCDefInst, SetVectorType &Worklist) const { // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineInstr &MI : @@ -3943,7 +3943,7 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist( return; if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1) - Worklist.push_back(&MI); + Worklist.insert(&MI); } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index d00c0d4a7f4e..3dd5bc89e6c7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -19,6 +19,7 @@ #include "AMDGPUInstrInfo.h" #include "SIDefines.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/SetVector.h" namespace llvm { @@ -38,6 +39,8 @@ class SIInstrInfo final : public AMDGPUInstrInfo { EXECZ = 3 }; + typedef SmallSetVector SetVectorType; + static unsigned getBranchOpcode(BranchPredicate Cond); static BranchPredicate getBranchPredicate(unsigned Opcode); @@ -56,30 +59,30 @@ class SIInstrInfo final : public AMDGPUInstrInfo { void swapOperands(MachineInstr &Inst) const; - void lowerScalarAbs(SmallVectorImpl &Worklist, + void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitUnaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitUnaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBinaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBCNT(SmallVectorImpl &Worklist, + void splitScalar64BitBCNT(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitBFE(SmallVectorImpl &Worklist, + void splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const; - void movePackToVALU(SmallVectorImpl &Worklist, + void movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const; void addUsersToMoveToVALUWorklist( unsigned Reg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const; + SetVectorType &Worklist) const; void addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, - SmallVectorImpl &Worklist) const; + SetVectorType &Worklist) const; const TargetRegisterClass * getDestEquivalentVGPRClass(const MachineInstr &Inst) const; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td index ffb01363e131..088173680fa8 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1436,7 +1436,7 @@ class VOPProfile _ArgVT> { field bit IsPacked = isPackedType.ret; field bit HasOpSel = IsPacked; - field bit HasOMod = !if(HasOpSel, 0, HasModifiers); + field bit HasOMod = !if(HasOpSel, 0, isFloatType.ret); field bit HasSDWAOMod = isFloatType.ret; field bit HasExt = getHasExt.ret; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index bcc685015cf5..ba69e42d9125 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1060,7 +1060,7 @@ def : Pat < class FPToI1Pat : Pat < (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), - (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) + (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE)) >; def : FPToI1Pat; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 3203c38dae34..a7c8166ff6d2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -23,10 +23,10 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), - ScratchRSrcReg(AMDGPU::NoRegister), - ScratchWaveOffsetReg(AMDGPU::NoRegister), - FrameOffsetReg(AMDGPU::NoRegister), - StackPtrOffsetReg(AMDGPU::NoRegister), + ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG), + ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG), + FrameOffsetReg(AMDGPU::FP_REG), + StackPtrOffsetReg(AMDGPU::SP_REG), PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), DispatchPtrUserSGPR(AMDGPU::NoRegister), QueuePtrUserSGPR(AMDGPU::NoRegister), @@ -42,6 +42,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), + WorkItemIDXVGPR(AMDGPU::NoRegister), + WorkItemIDYVGPR(AMDGPU::NoRegister), + WorkItemIDZVGPR(AMDGPU::NoRegister), PSInputAddr(0), PSInputEnable(0), ReturnsVoid(true), @@ -87,12 +90,14 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ScratchWaveOffsetReg = AMDGPU::SGPR4; FrameOffsetReg = AMDGPU::SGPR5; StackPtrOffsetReg = AMDGPU::SGPR32; - return; + + // FIXME: Not really a system SGPR. + PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg; } CallingConv::ID CC = F->getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { - KernargSegmentPtr = true; + KernargSegmentPtr = !F->arg_empty(); WorkGroupIDX = true; WorkItemIDX = true; } else if (CC == CallingConv::AMDGPU_PS) { @@ -101,17 +106,25 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (ST.debuggerEmitPrologue()) { // Enable everything. + WorkGroupIDX = true; WorkGroupIDY = true; WorkGroupIDZ = true; + WorkItemIDX = true; WorkItemIDY = true; WorkItemIDZ = true; } else { + if (F->hasFnAttribute("amdgpu-work-group-id-x")) + WorkGroupIDX = true; + if (F->hasFnAttribute("amdgpu-work-group-id-y")) WorkGroupIDY = true; if (F->hasFnAttribute("amdgpu-work-group-id-z")) WorkGroupIDZ = true; + if (F->hasFnAttribute("amdgpu-work-item-id-x")) + WorkItemIDX = true; + if (F->hasFnAttribute("amdgpu-work-item-id-y")) WorkItemIDY = true; @@ -119,25 +132,28 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDZ = true; } - // X, XY, and XYZ are the only supported combinations, so make sure Y is - // enabled if Z is. - if (WorkItemIDZ) - WorkItemIDY = true; - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); bool MaySpill = ST.isVGPRSpillingEnabled(*F); - bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls(); + bool HasStackObjects = FrameInfo.hasStackObjects(); - if (HasStackObjects || MaySpill) { - PrivateSegmentWaveByteOffset = true; + if (isEntryFunction()) { + // X, XY, and XYZ are the only supported combinations, so make sure Y is + // enabled if Z is. + if (WorkItemIDZ) + WorkItemIDY = true; - // HS and GS always have the scratch wave offset in SGPR5 on GFX9. - if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && - (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) - PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; + if (HasStackObjects || MaySpill) { + PrivateSegmentWaveByteOffset = true; + + // HS and GS always have the scratch wave offset in SGPR5 on GFX9. + if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && + (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) + PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; + } } - if (ST.isAmdCodeObjectV2(MF)) { + bool IsCOV2 = ST.isAmdCodeObjectV2(MF); + if (IsCOV2) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -154,11 +170,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitBufferPtr = true; } - // We don't need to worry about accessing spills with flat instructions. - // TODO: On VI where we must use flat for global, we should be able to omit - // this if it is never used for generic access. - if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS()) - FlatScratchInit = true; + if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr")) + KernargSegmentPtr = true; + + if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { + // TODO: This could be refined a lot. The attribute is a poor way of + // detecting calls that may require it before argument lowering. + if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) + FlatScratchInit = true; + } } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 05aa249584bf..4c7f38a09a48 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -119,6 +119,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned WorkGroupInfoSystemSGPR; unsigned PrivateSegmentWaveByteOffsetSystemSGPR; + // VGPR inputs. These are always v0, v1 and v2 for entry functions. + unsigned WorkItemIDXVGPR; + unsigned WorkItemIDYVGPR; + unsigned WorkItemIDZVGPR; + // Graphics info. unsigned PSInputAddr; unsigned PSInputEnable; @@ -377,10 +382,13 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { } void setStackPtrOffsetReg(unsigned Reg) { - assert(Reg != AMDGPU::NoRegister && "Should never be unset"); StackPtrOffsetReg = Reg; } + // Note the unset value for this is AMDGPU::SP_REG rather than + // NoRegister. This is mostly a workaround for MIR tests where state that + // can't be directly computed from the function is not preserved in serialized + // MIR. unsigned getStackPtrOffsetReg() const { return StackPtrOffsetReg; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ef6ad4ad0c8f..4a3fbb4593bb 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -207,7 +207,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); } + // We have to assume the SP is needed in case there are calls in the function, + // which is detected after the function is lowered. If we aren't really going + // to need SP, don't bother reserving it. unsigned StackPtrReg = MFI->getStackPtrOffsetReg(); + if (StackPtrReg != AMDGPU::NoRegister) { reserveRegisterTuples(Reserved, StackPtrReg); assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index fc808011cd88..54ea7805e18d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -23,6 +23,13 @@ class SIReg regIdx = 0> : Register, def VCC_LO : SIReg<"vcc_lo", 106>; def VCC_HI : SIReg<"vcc_hi", 107>; +// Pseudo-registers: Used as placeholders during isel and immediately +// replaced, never seeing the verifier. +def PRIVATE_RSRC_REG : SIReg<"", 0>; +def FP_REG : SIReg<"", 0>; +def SP_REG : SIReg<"", 0>; +def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>; + // VCC for 64-bit instructions def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, DwarfRegAlias { @@ -267,7 +274,8 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, - SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> { + SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, + FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> { let AllocationPriority = 7; } @@ -314,7 +322,8 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128R let isAllocatable = 0; } -def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> { +def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, + (add SGPR_128, TTMP_128)> { let AllocationPriority = 10; } @@ -464,7 +473,9 @@ defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; defm VSrc : RegImmOperand<"VS", "VSrc">; -def VSrc_128 : RegisterOperand; +def VSrc_128 : RegisterOperand { + let DecoderMethod = "DecodeVS_128RegisterClass"; +} //===----------------------------------------------------------------------===// // VSrc_* Operands with an VGPR diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 26515b27bb77..67ad904ca972 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -539,23 +539,9 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { } bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { - - if (Reg0 == Reg1) { - return true; + for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { + if (*R == Reg1) return true; } - - unsigned SubReg0 = TRI->getSubReg(Reg0, 1); - if (SubReg0 == 0) { - return TRI->getSubRegIndex(Reg1, Reg0) > 0; - } - - for (unsigned Idx = 2; SubReg0 > 0; ++Idx) { - if (isRegIntersect(Reg1, SubReg0, TRI)) { - return true; - } - SubReg0 = TRI->getSubReg(Reg0, Idx); - } - return false; } diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 7b9bc71ad4c7..d5acb49b4f39 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -117,7 +117,10 @@ class VOP2_SDWA_Pseudo pattern=[]> : class getVOP2Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (node (P.Src0VT + !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); } @@ -813,9 +816,11 @@ let SubtargetPredicate = isVI in { // Aliases to simplify matching of floating-point instructions that // are VOP2 on SI and VOP3 on VI. -class SI2_VI3Alias : InstAlias < +class SI2_VI3Alias : InstAlias < name#" $dst, $src0, $src1", - (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0) + !if(inst.Pfl.HasOMod, + (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), + (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) >, PredicateControl { let UseInstAsmMatchConverter = 0; let AsmVariantName = AMDGPUAsmVariants.VOP3; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td index a8ca593f14ed..92ed0706dc01 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -12,17 +12,21 @@ //===----------------------------------------------------------------------===// class getVOP3ModPat { + dag src0 = !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); + list ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (node (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (node (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))]; + (node (P.Src0VT src0)))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -92,6 +96,7 @@ class VOP3_Profile : VOPProfile { class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { // v_div_scale_{f32|f64} do not support input modifiers. let HasModifiers = 0; + let HasOMod = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index f2de1f995726..3becf758aaa3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -34,6 +34,9 @@ class VOP3_VOP3PInst, fma>; +def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile>; +def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile>; + def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile, fadd>; def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile, fmul>; def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile, fmaxnum>; @@ -41,7 +44,6 @@ def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile, add>; def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile>; -def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile, sub>; def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile, mul>; def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile, smin>; @@ -50,6 +52,9 @@ def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile, umax>; } +def V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile>; +def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile, sub>; + def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile, lshl_rev>; def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile, ashr_rev>; def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile, lshr_rev>; @@ -71,6 +76,7 @@ multiclass VOP3P_Real_vi op> { } } +defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>; defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>; defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>; defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>; @@ -79,8 +85,10 @@ defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>; defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>; defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>; defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>; +defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>; defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>; +defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>; defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>; defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>; defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td index f3482a22d5dc..b636fc9be431 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -148,6 +148,19 @@ class VOPCInstAlias : let SubtargetPredicate = AssemblerPredicate; } +class getVOPCPat64 : LetDummies { + list ret = !if(P.HasModifiers, + [(set i1:$sdst, + (setcc (P.Src0VT + !if(P.HasOMod, + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), + cond))], + [(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]); +} + + multiclass VOPC_Pseudos , + def _e64 : VOP3_Pseudo.ret>, Commutable_REV { let Defs = !if(DefExec, [EXEC], []); let SchedRW = P.Schedule; @@ -634,7 +640,7 @@ class FCMP_Pattern : Pat < (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - DSTCLAMP.NONE, DSTOMOD.NONE) + DSTCLAMP.NONE) >; def : FCMP_Pattern ; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td index 77b7952b22a8..b47538ba0349 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -136,6 +136,8 @@ class VOP3_Real : let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; let Uses = ps.Uses; + + VOPProfile Pfl = ps.Pfl; } // XXX - Is there any reason to distingusih this from regular VOP3 diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index c40b4450a5b5..e49c1babac21 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -16,145 +16,173 @@ include "llvm/Target/Target.td" -//===----------------------------------------------------------------------===// -// ARM Helper classes. -// - -class ProcNoItin Features> - : Processor; - -class Architecture features > - : SubtargetFeature; - //===----------------------------------------------------------------------===// // ARM Subtarget state. // -def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", - "Thumb mode">; +def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", + "true", "Thumb mode">; + +def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", + "true", "Use software floating " + "point features.">; -def ModeSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", - "Use software floating point features.">; //===----------------------------------------------------------------------===// // ARM Subtarget features. // -def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", - "Enable VFP2 instructions">; -def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", - "Enable VFP3 instructions", - [FeatureVFP2]>; -def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable NEON instructions", - [FeatureVFP3]>; -def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", - "Enable Thumb2 instructions">; -def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution", - [ModeThumb]>; -def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", - "Enable half-precision floating point">; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3, FeatureFP16]>; -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", - "true", "Enable ARMv8 FP", - [FeatureVFP4]>; -def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", - "Enable full half-precision floating point", - [FeatureFPARMv8]>; -def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", - "Restrict FP to 16 double registers">; -def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb", - "true", - "Enable divide instructions in Thumb">; -def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasHardwareDivideInARM", "true", - "Enable divide instructions in ARM mode">; -def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", - "Has data barrier (dmb / dsb) instructions">; -def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", - "Has v7 clrex instruction">; +// Floating Point, HW Division and Neon Support +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions">; + +def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", + "Enable VFP3 instructions", + [FeatureVFP2]>; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable NEON instructions", + [FeatureVFP3]>; + +def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", + "Enable half-precision " + "floating point">; + +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Enable full half-precision " + "floating point", + [FeatureFPARMv8]>; + +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports " + "single precision only">; + +def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", + "Restrict FP to 16 double registers">; + +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", + "HasHardwareDivideInThumb", "true", + "Enable divide instructions in Thumb">; + +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; + +// Atomic Support +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb/dsb) instructions">; + +def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", + "Has v7 clrex instruction">; + def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", - "Has v8 acquire/release (lda/ldaex etc) instructions">; -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", - "FP compare + branch is slow">; -def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", - "Floating point unit supports single precision only">; -def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", - "Enable support for Performance Monitor extensions">; -def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", - "Enable support for TrustZone security extensions">; -def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", - "Enable support for ARMv8-M Security Extensions">; -def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", - "Enable support for Cryptography extensions", - [FeatureNEON]>; -def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", - "Enable support for CRC instructions">; + "Has v8 acquire/release (lda/ldaex " + " etc) instructions">; + + +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", + "FP compare + branch is slow">; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance " + "Monitor extensions">; + + +// TrustZone Security Extensions +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone " + "security extensions">; + +def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", + "Enable support for ARMv8-M " + "Security Extensions">; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for " + "Cryptography extensions", + [FeatureNEON]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; + + // Not to be confused with FeatureHasRetAddrStack (return address stack) -def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", - "Enable Reliability, Availability and Serviceability extensions">; -def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", - "Enable fast computation of positive address offsets">; -def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", - "CPU fuses AES crypto operations">; +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable Reliability, Availability " + "and Serviceability extensions">; -// Cyclone has preferred instructions for zeroing VFP registers, which can -// execute in 0 cycles. -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; +// Fast computation of non-negative address offsets +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of " + "positive address offsets">; -// Whether or not it may be profitable to unpredicate certain instructions -// during if conversion. +// Fast execution of AES crypto operations +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +// Cyclone can zero VFP registers in 0 cycles. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +// Whether it is profitable to unpredicate certain instructions during if-conversion def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", - "IsProfitableToUnpredicate", - "true", + "IsProfitableToUnpredicate", "true", "Is profitable to unpredicate">; // Some targets (e.g. Swift) have microcoded VGETLNi32. -def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", - "HasSlowVGETLNi32", "true", - "Has slow VGETLNi32 - prefer VMOV">; +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; // Some targets (e.g. Swift) have microcoded VDUP32. -def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", - "Has slow VDUP32 - prefer VMOV">; +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", + "true", + "Has slow VDUP32 - prefer VMOV">; // Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON // for scalar FP, as this allows more effective execution domain optimization. -def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", - "true", "Prefer VMOVSR">; +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; // Swift has ISHST barriers compatible with Atomic Release semantics but weaker // than ISH def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", - "true", "Prefer ISHST barriers">; + "true", "Prefer ISHST barriers">; // Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. -def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", - "Has muxed AGU and NEON/FPU">; +def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", + "true", + "Has muxed AGU and NEON/FPU">; -// On some targets, a VLDM/VSTM starting with an odd register number needs more -// microops than single VLDRS. +// Whether VLDM/VSTM starting with odd register number need more microops +// than single VLDRS def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", - "true", "VLDM/VSTM starting with an odd register is slow">; + "true", "VLDM/VSTM starting " + "with an odd register is slow">; // Some targets have a renaming dependency when loading into D subregisters. def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", "SlowLoadDSubregister", "true", "Loading into D subregs is slow">; + // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. -def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", - "Expand VFP/NEON MLA/MLS instructions">; +def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", + "ExpandMLx", "true", + "Expand VFP/NEON MLA/MLS instructions">; // Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", @@ -162,15 +190,18 @@ def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. -def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", - "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">; +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", + "UseNEONForFPMovs", "true", + "Convert VMOVSR, VMOVRS, " + "VMOVS to NEON">; // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. This affects instruction selection and should // only be enabled if the handling of denormals is not important. -def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", - "true", - "Use NEON for single precision FP">; +def FeatureNEONForFP : SubtargetFeature<"neonfp", + "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; // On some processors, VLDn instructions that access unaligned data take one // extra cycle. Take that into account when computing operand latencies. @@ -181,18 +212,18 @@ def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", // Some processors have a nonpipelined VFP coprocessor. def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", "NonpipelinedVFP", "true", - "VFP instructions are not pipelined">; + "VFP instructions are not pipelined">; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. -def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", - "Disable VFP / NEON MAC instructions">; +def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", + "Disable VFP / NEON MAC instructions">; // Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", - "HasVMLxForwarding", "true", - "Has multiplier accumulator forwarding">; + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", @@ -213,14 +244,16 @@ def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", "true", "Disable +1 predication cost for instructions updating CPSR">; -def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", - "AvoidMOVsShifterOperand", "true", - "Avoid movs instructions with shifter operand">; +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with " + "shifter operand">; // Some processors perform return stack prediction. CodeGen should avoid issue // "normal" call instructions to callees which do not return. -def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true", - "Has return address stack">; +def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", + "HasRetAddrStack", "true", + "Has return address stack">; // Some processors have no branch predictor, which changes the expected cost of // taking a branch which affects the choice of whether to use predicated @@ -230,63 +263,80 @@ def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", "Has no branch predictor">; /// DSP extension. -def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", - "Supports DSP instructions in ARM and/or Thumb2">; +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", + "Supports DSP instructions in " + "ARM and/or Thumb2">; // Multiprocessing extension. -def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", - "Supports Multiprocessing extension">; +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; // Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). def FeatureVirtualization : SubtargetFeature<"virtualization", - "HasVirtualization", "true", - "Supports Virtualization extension", - [FeatureHWDivThumb, FeatureHWDivARM]>; + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDivThumb, FeatureHWDivARM]>; -// M-series ISA -def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", - "Is microcontroller profile ('M' series)">; +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; -// R-series ISA -def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", - "Is realtime profile ('R' series)">; +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + +def FeatureExecuteOnly : SubtargetFeature<"execute-only", + "GenExecuteOnly", "true", + "Enable the generation of " + "execute only code.">; + +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; + +def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", + "Don't use movt/movw pairs for " + "32-bit imms">; + +def FeatureNoNegativeImmediates + : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + + +//===----------------------------------------------------------------------===// +// ARM architecture class +// // A-series ISA def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", "Is application profile ('A' series)">; -// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. -// See ARMInstrInfo.td for details. -def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", - "NaCl trap">; +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; -def FeatureStrictAlign : SubtargetFeature<"strict-align", - "StrictAlign", "true", - "Disallow all unaligned memory " - "access">; +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", + "Is microcontroller profile ('M' series)">; -def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", - "Generate calls via indirect call " - "instructions">; -def FeatureExecuteOnly - : SubtargetFeature<"execute-only", "GenExecuteOnly", "true", - "Enable the generation of execute only code.">; +def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", + "Enable Thumb2 instructions">; -def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", - "Reserve R9, making it unavailable as " - "GPR">; +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution", + [ModeThumb]>; -def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", - "Don't use movt/movw pairs for 32-bit " - "imms">; - -def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", - "NegativeImmediates", "false", - "Convert immediates and instructions " - "to their negated or complemented " - "equivalent when the immediate does " - "not fit in the encoding.">; //===----------------------------------------------------------------------===// // ARM ISAa. @@ -294,43 +344,57 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; + def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", "Support ARM v5T instructions", [HasV4TOps]>; + def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", - "Support ARM v5TE, v5TEj, and v5TExp instructions", + "Support ARM v5TE, v5TEj, and " + "v5TExp instructions", [HasV5TOps]>; + def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", "Support ARM v6 instructions", [HasV5TEOps]>; + def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", "Support ARM v6M instructions", [HasV6Ops]>; + def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", "Support ARM v8M Baseline instructions", [HasV6MOps]>; + def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", "Support ARM v6k instructions", [HasV6Ops]>; + def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; + def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops, FeaturePerfMon, FeatureV7Clrex]>; + +def HasV8MMainlineOps : + SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", + "Support ARM v8M Mainline instructions", + [HasV7Ops]>; + def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", [HasV7Ops, FeatureAcquireRelease]>; + def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops]>; -def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", "Support ARM v8.2a instructions", [HasV8_1aOps]>; -def HasV8MMainlineOps : SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", - "Support ARM v8M Mainline instructions", - [HasV7Ops]>; //===----------------------------------------------------------------------===// @@ -386,11 +450,17 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", "Cortex-M3 ARM processors", []>; + //===----------------------------------------------------------------------===// -// ARM schedules. +// ARM Helper classes. // -include "ARMSchedule.td" +class Architecture features> + : SubtargetFeature; + +class ProcNoItin Features> + : Processor; //===----------------------------------------------------------------------===// @@ -546,6 +616,12 @@ def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; +//===----------------------------------------------------------------------===// +// ARM schedules. +//===----------------------------------------------------------------------===// +// +include "ARMSchedule.td" + //===----------------------------------------------------------------------===// // ARM processors // @@ -553,6 +629,9 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; // Dummy CPU, used to target architectures def : ProcessorModel<"generic", CortexA8Model, []>; +// FIXME: Several processors below are not using their own scheduler +// model, but one of similar/previous processor. These should be fixed. + def : ProcNoItin<"arm8", [ARMv4]>; def : ProcNoItin<"arm810", [ARMv4]>; def : ProcNoItin<"strongarm", [ARMv4]>; @@ -612,7 +691,6 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, FeatureVFP2, FeatureHasSlowFPVMLx]>; -// FIXME: A5 has currently the same Schedule model as A8 def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, FeatureHasRetAddrStack, FeatureTrustZone, @@ -656,7 +734,6 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, FeatureCheckVLDnAlign, FeatureMP]>; -// FIXME: A12 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, FeatureHasRetAddrStack, FeatureTrustZone, @@ -666,7 +743,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, FeatureVirtualization, FeatureMP]>; -// FIXME: A15 has currently the same Schedule model as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureDontWidenVMOVS, FeatureHasRetAddrStack, @@ -678,7 +754,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureAvoidPartialCPSR, FeatureVirtualization]>; -// FIXME: A17 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, FeatureHasRetAddrStack, FeatureTrustZone, @@ -688,9 +763,7 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, FeatureAvoidPartialCPSR, FeatureVirtualization]>; -// FIXME: krait has currently the same Schedule model as A9 -// FIXME: krait has currently the same features as A9 plus VFP4 and hardware -// division features. +// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureHasRetAddrStack, FeatureMuxedUnits, @@ -720,12 +793,10 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureSlowVGETLNi32, FeatureSlowVDUP32]>; -// FIXME: R4 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, FeatureAvoidPartialCPSR]>; -// FIXME: R4F has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, FeatureSlowFPBrcc, @@ -734,7 +805,6 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureD16, FeatureAvoidPartialCPSR]>; -// FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureHasRetAddrStack, FeatureVFP3, @@ -744,7 +814,6 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureHasRetAddrStack, FeatureVFP3, @@ -814,14 +883,14 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, FeatureCRC, FeatureFPAO]>; -def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO, - FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, FeatureHWDivThumb, @@ -835,7 +904,6 @@ def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureCrypto, FeatureCRC]>; -// Cyclone is very similar to swift def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, @@ -881,9 +949,7 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, //===----------------------------------------------------------------------===// include "ARMRegisterInfo.td" - include "ARMRegisterBanks.td" - include "ARMCallingConv.td" //===----------------------------------------------------------------------===// @@ -891,7 +957,6 @@ include "ARMCallingConv.td" //===----------------------------------------------------------------------===// include "ARMInstrInfo.td" - def ARMInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// @@ -912,7 +977,7 @@ def ARMAsmParserVariant : AsmParserVariant { } def ARM : Target { - // Pull in Instruction Info: + // Pull in Instruction Info. let InstructionSet = ARMInstrInfo; let AssemblyWriters = [ARMAsmWriter]; let AssemblyParserVariants = [ARMAsmParserVariant]; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e97a7ce5067f..370c0a7f5c53 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -117,7 +117,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const ARMSubtarget &STI = MF.getSubtarget(); if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() && @@ -163,7 +163,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, // both or otherwise does not want to enable this optimization, the function // should return NULL if (CC == CallingConv::GHC) - // This is academic becase all GHC calls are (supposed to be) tail calls + // This is academic because all GHC calls are (supposed to be) tail calls return nullptr; return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 384f80356cc8..bf00ef61c2d1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -250,8 +250,7 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { return false; // Look to see if our OptionalDef is defining CPSR or CCR. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; if (MO.getReg() == ARM::CPSR) *CPSR = true; @@ -267,8 +266,8 @@ bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { AFI->isThumb2Function()) return MI->isPredicable(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) - if (MCID.OpInfo[i].isPredicate()) + for (const MCOperandInfo &opInfo : MCID.operands()) + if (opInfo.isPredicate()) return true; return false; @@ -1972,7 +1971,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, break; } case CCValAssign::AExt: - // Intentional fall-through. Handle AExt and ZExt. + // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); @@ -2001,6 +2000,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, assert(VA.getLocVT() == MVT::f64 && "Custom lowering for v2f64 args not available"); + // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size() CCValAssign &NextVA = ArgLocs[++i]; assert(VA.isRegLoc() && NextVA.isRegLoc() && @@ -2172,8 +2172,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc)); AddOptionalDefs(MIB); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned R : RetRegs) + MIB.addReg(R, RegState::Implicit); return true; } @@ -2233,8 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { ArgRegs.reserve(I->getNumOperands()); ArgVTs.reserve(I->getNumOperands()); ArgFlags.reserve(I->getNumOperands()); - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value *Op = I->getOperand(i); + for (Value *Op : I->operands()) { unsigned Arg = getRegForValue(Op); if (Arg == 0) return false; @@ -2278,8 +2277,8 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { MIB.addExternalSymbol(TLI.getLibcallName(Call)); // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); + for (unsigned R : RegArgs) + MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2423,8 +2422,8 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); + for (unsigned R : RegArgs) + MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2932,13 +2931,12 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, bool Found = false; bool isZExt; - for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends); - i != e; ++i) { - if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() && - (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm && - MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) { + for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) { + if (FLE.Opc[isThumb2] == MI->getOpcode() && + (uint64_t)FLE.ExpectedImm == Imm && + MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) { Found = true; - isZExt = FoldableLoadExtends[i].isZExt; + isZExt = FLE.isZExt; } } if (!Found) return false; @@ -3057,9 +3055,8 @@ bool ARMFastISel::fastLowerArguments() { }; const TargetRegisterClass *RC = &ARM::rGPRRegClass; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { - unsigned ArgNo = I->getArgNo(); + for (const Argument &Arg : F->args()) { + unsigned ArgNo = Arg.getArgNo(); unsigned SrcReg = GPRArgRegs[ArgNo]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. @@ -3069,7 +3066,7 @@ bool ARMFastISel::fastLowerArguments() { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(DstReg, getKillRegState(true)); - updateValueMap(&*I, ResultReg); + updateValueMap(&Arg, ResultReg); } return true; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7f9fe55a5c38..f75dd4de3f96 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2682,9 +2682,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) { SDNode *ResNode; if (Subtarget->isThumb()) { - SDValue Pred = getAL(CurDAG, dl); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; + SDValue Ops[] = { + CPIdx, + getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getEntryNode() + }; ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, Ops); } else { @@ -2698,6 +2701,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) { ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, Ops); } + // Annotate the Node with memory operand information so that MachineInstr + // queries work properly. This e.g. gives the register allocation the + // required information for rematerialization. + MachineFunction& MF = CurDAG->getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = MF.getMachineMemOperand( + MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, 4, 4); + + cast(ResNode)->setMemRefs(MemOp, MemOp+1); + ReplaceNode(N, ResNode); return; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 29ef69ad0010..faed6b867e2b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -722,6 +722,29 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { return false; break; } + case G_BRCOND: { + if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) { + DEBUG(dbgs() << "Unsupported condition register for G_BRCOND"); + return false; + } + + // Set the flags. + auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri)) + .addReg(I.getOperand(0).getReg()) + .addImm(1) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI)) + return false; + + // Branch conditionally. + auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) + .add(I.getOperand(1)) + .add(predOps(ARMCC::EQ, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) + return false; + I.eraseFromParent(); + return true; + } default: return false; } diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index f23e62595d2e..1c17c07e4cb0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -66,14 +66,16 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, s32}, Libcall); } - // FIXME: Support s8 and s16 as well - for (unsigned Op : {G_SREM, G_UREM}) + for (unsigned Op : {G_SREM, G_UREM}) { + for (auto Ty : {s8, s16}) + setAction({Op, Ty}, WidenScalar); if (ST.hasDivideInARMMode()) setAction({Op, s32}, Lower); else if (AEABI(ST)) setAction({Op, s32}, Custom); else setAction({Op, s32}, Libcall); + } for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); @@ -88,6 +90,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_SELECT, p0}, Legal); setAction({G_SELECT, 1, s1}, Legal); + setAction({G_BRCOND, s1}, Legal); + setAction({G_CONSTANT, s32}, Legal); for (auto Ty : {s1, s8, s16}) setAction({G_CONSTANT, Ty}, WidenScalar); diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 13acea3c28a9..48b02d40b246 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -153,9 +153,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, break; } - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; if (AP.lowerOperand(MO, MCOp)) { if (MCOp.isImm() && EncodeImms) { diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index c0c09e8c15af..844930235894 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -259,6 +259,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; case G_SELECT: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + (void)Ty; LLT Ty2 = MRI.getType(MI.getOperand(1).getReg()); (void)Ty2; assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT"); @@ -282,6 +283,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case G_FCMP: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + (void)Ty; LLT Ty1 = MRI.getType(MI.getOperand(2).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); (void)Ty2; @@ -329,6 +331,13 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { &ARM::ValueMappings[ARM::DPR3OpsIdx]}); break; } + case G_BR: + OperandsMapping = getOperandsMapping({nullptr}); + break; + case G_BRCOND: + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + break; default: return getInvalidInstructionMapping(); } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index f41da3e8e223..22ce949367f3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -47,6 +47,8 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { ~ARMBaseTargetMachine() override; const ARMSubtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some targets, is + // deprecated and should not be used. const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp index cc7a7c3849bc..81b0aa7f8b98 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -515,8 +515,9 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); + bool isSelectOp = MI.getOpcode() == BPF::Select; - assert(MI.getOpcode() == BPF::Select && "Unexpected instr type to insert"); + assert((isSelectOp || MI.getOpcode() == BPF::Select_Ri) && "Unexpected instr type to insert"); // To "insert" a SELECT instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg @@ -548,48 +549,40 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Insert Branch if Flag unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); int CC = MI.getOperand(3).getImm(); + int NewCC; switch (CC) { case ISD::SETGT: - BuildMI(BB, DL, TII.get(BPF::JSGT_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JSGT_rr : BPF::JSGT_ri; break; case ISD::SETUGT: - BuildMI(BB, DL, TII.get(BPF::JUGT_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JUGT_rr : BPF::JUGT_ri; break; case ISD::SETGE: - BuildMI(BB, DL, TII.get(BPF::JSGE_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JSGE_rr : BPF::JSGE_ri; break; case ISD::SETUGE: - BuildMI(BB, DL, TII.get(BPF::JUGE_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JUGE_rr : BPF::JUGE_ri; break; case ISD::SETEQ: - BuildMI(BB, DL, TII.get(BPF::JEQ_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JEQ_rr : BPF::JEQ_ri; break; case ISD::SETNE: - BuildMI(BB, DL, TII.get(BPF::JNE_rr)) - .addReg(LHS) - .addReg(RHS) - .addMBB(Copy1MBB); + NewCC = isSelectOp ? BPF::JNE_rr : BPF::JNE_ri; break; default: report_fatal_error("unimplemented select CondCode " + Twine(CC)); } + if (isSelectOp) + BuildMI(BB, DL, TII.get(NewCC)) + .addReg(LHS) + .addReg(MI.getOperand(2).getReg()) + .addMBB(Copy1MBB); + else + BuildMI(BB, DL, TII.get(NewCC)) + .addReg(LHS) + .addImm(MI.getOperand(2).getImm()) + .addMBB(Copy1MBB); // Copy0MBB: // %FalseValue = ... diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td index 5ad777268208..f68357809add 100644 --- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -460,6 +460,11 @@ let usesCustomInserter = 1 in { "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", [(set i64:$dst, (BPFselectcc i64:$lhs, i64:$rhs, (i64 imm:$imm), i64:$src, i64:$src2))]>; + def Select_Ri : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR:$src, GPR:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i64:$dst, + (BPFselectcc i64:$lhs, (i64 imm:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; } // load 64-bit global addr into register diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index b064778c4bbd..d75d95a6baea 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexbit" - #include "HexagonBitTracker.h" #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" @@ -42,6 +40,8 @@ #include #include +#define DEBUG_TYPE "hexbit" + using namespace llvm; static cl::opt PreserveTiedOps("hexbit-keep-tied", cl::Hidden, diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index 2dc74632e9be..30ebf89c9808 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -45863,6 +45863,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000000; let isSoloAin1 = 1; +let hasSideEffects = 1; } def Y2_dccleaninva : HInst< (outs), @@ -45872,6 +45873,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000010; let isSoloAin1 = 1; +let hasSideEffects = 1; } def Y2_dcfetch : HInst< (outs), @@ -45900,6 +45902,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000001; let isSoloAin1 = 1; +let hasSideEffects = 1; } def Y2_dczeroa : HInst< (outs), @@ -45909,6 +45912,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000110; let isSoloAin1 = 1; +let hasSideEffects = 1; let mayStore = 1; } def Y2_icinva : HInst< diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 03c4a83594b3..80361015e649 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -59,8 +59,6 @@ // J2_jump , %PC // Successors according to CFG: BB#6 BB#3 -#define DEBUG_TYPE "hexagon-eif" - #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" @@ -90,6 +88,8 @@ #include #include +#define DEBUG_TYPE "hexagon-eif" + using namespace llvm; namespace llvm { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 734f3c6658d9..a2f6dd68c1a1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -86,8 +86,6 @@ // however, is that finding the locations where the implicit uses need // to be added, and updating the live ranges will be more involved. -#define DEBUG_TYPE "expand-condsets" - #include "HexagonInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -116,6 +114,8 @@ #include #include +#define DEBUG_TYPE "expand-condsets" + using namespace llvm; static cl::opt OptTfrLimit("expand-condsets-tfr-limit", diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index c790579ccebc..e5e75198b2d1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -8,8 +8,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-pei" - #include "HexagonFrameLowering.h" #include "HexagonBlockRanges.h" #include "HexagonInstrInfo.h" @@ -63,6 +61,8 @@ #include #include +#define DEBUG_TYPE "hexagon-pei" + // Hexagon stack frame layout as defined by the ABI: // // Incoming arguments diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index bf31e1699284..0a955aedaf1a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexinsert" - #include "BitTracker.h" #include "HexagonBitTracker.h" #include "HexagonInstrInfo.h" @@ -44,6 +42,8 @@ #include #include +#define DEBUG_TYPE "hexinsert" + using namespace llvm; static cl::opt VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index 3470480d607d..2da211563e0a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "gen-pred" - #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SetVector.h" @@ -35,6 +33,8 @@ #include #include +#define DEBUG_TYPE "gen-pred" + using namespace llvm; namespace llvm { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 67242764d453..3997702bc962 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1286,16 +1286,6 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV)); } -// Creates a SPLAT instruction for a constant value VAL. -static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT, - SDValue Val) { - EVT T = VT.getVectorElementType(); - if (T == MVT::i8 || T == MVT::i16) - return DAG.getNode(HexagonISD::VSPLAT, dl, VT, Val); - - return SDValue(); -} - static bool isSExtFree(SDValue N) { // A sign-extend of a truncate of a sign-extend is free. if (N.getOpcode() == ISD::TRUNCATE && @@ -1374,79 +1364,6 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// Handle only specific vector loads. -SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - LoadSDNode *LoadNode = cast(Op); - SDValue Chain = LoadNode->getChain(); - SDValue Ptr = Op.getOperand(1); - SDValue LoweredLoad; - SDValue Result; - SDValue Base = LoadNode->getBasePtr(); - ISD::LoadExtType Ext = LoadNode->getExtensionType(); - unsigned Alignment = LoadNode->getAlignment(); - SDValue LoadChain; - - if(Ext == ISD::NON_EXTLOAD) - Ext = ISD::ZEXTLOAD; - - if (VT == MVT::v4i16) { - if (Alignment == 2) { - SDValue Loads[4]; - // Base load. - Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // Base+2 load. - SDValue Increment = DAG.getConstant(2, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // SHL 16, then OR base and base+2. - SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32); - SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); - SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]); - // Base + 4. - Increment = DAG.getConstant(4, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // Base + 6. - Increment = DAG.getConstant(6, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // SHL 16, then OR base+4 and base+6. - Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); - SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); - // Combine to i64. This could be optimised out later if we can - // affect reg allocation of this code. - Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2); - LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - Loads[0].getValue(1), Loads[1].getValue(1), - Loads[2].getValue(1), Loads[3].getValue(1)); - } else { - // Perform default type expansion. - Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), - LoadNode->getAlignment(), - LoadNode->getMemOperand()->getFlags()); - LoadChain = Result.getValue(1); - } - } else - llvm_unreachable("Custom lowering unsupported load"); - - Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); - // Since we pretend to lower a load, we need the original chain - // info attached to the result. - SDValue Ops[] = { Result, LoadChain }; - - return DAG.getMergeValues(Ops, DL); -} - SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); @@ -1971,18 +1888,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Handling of vector operations. // - // Custom lower v4i16 load only. Let v4i16 store to be - // promoted for now. promoteLdStType(MVT::v4i8, MVT::i32); promoteLdStType(MVT::v2i16, MVT::i32); promoteLdStType(MVT::v8i8, MVT::i64); + promoteLdStType(MVT::v4i16, MVT::i64); promoteLdStType(MVT::v2i32, MVT::i64); - setOperationAction(ISD::LOAD, MVT::v4i16, Custom); - setOperationAction(ISD::STORE, MVT::v4i16, Promote); - AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64); - AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64); - // Set the action for vector operations to "expand", then override it with // either "custom" or "legal" for specific cases. static const unsigned VectExpOps[] = { @@ -2301,7 +2212,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; - case HexagonISD::VPACK: return "HexagonISD::VPACK"; + case HexagonISD::VPACKE: return "HexagonISD::VPACKE"; + case HexagonISD::VPACKO: return "HexagonISD::VPACKO"; case HexagonISD::VASL: return "HexagonISD::VASL"; case HexagonISD::VASR: return "HexagonISD::VASR"; case HexagonISD::VLSR: return "HexagonISD::VLSR"; @@ -2394,7 +2306,7 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) - return createSplat(DAG, dl, VT, V1.getOperand(0)); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR // (and probably will turn into a SCALAR_TO_VECTOR once legalization @@ -2409,28 +2321,26 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) } } if (IsScalarToVector) - return createSplat(DAG, dl, VT, V1.getOperand(0)); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); } - return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32)); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, + DAG.getConstant(Lane, dl, MVT::i32)); } if (UseHVX) { ArrayRef Mask = SVN->getMask(); size_t MaskLen = Mask.size(); - int ElemSizeInBits = VT.getScalarSizeInBits(); - if ((Subtarget.useHVXSglOps() && (ElemSizeInBits * MaskLen) == 64 * 8) || - (Subtarget.useHVXDblOps() && (ElemSizeInBits * MaskLen) == 128 * 8)) { - // Return 1 for odd and 2 of even - StridedLoadKind Pattern = isStridedLoad(Mask); + unsigned SizeInBits = VT.getScalarSizeInBits() * MaskLen; + if ((Subtarget.useHVXSglOps() && SizeInBits == 64 * 8) || + (Subtarget.useHVXDblOps() && SizeInBits == 128 * 8)) { + StridedLoadKind Pattern = isStridedLoad(Mask); if (Pattern == StridedLoadKind::NoPattern) return SDValue(); - SDValue Vec0 = Op.getOperand(0); - SDValue Vec1 = Op.getOperand(1); - SDValue StridePattern = DAG.getConstant(Pattern, dl, MVT::i32); - SDValue Ops[] = { Vec1, Vec0, StridePattern }; - return DAG.getNode(HexagonISD::VPACK, dl, VT, Ops); + unsigned Opc = Pattern == StridedLoadKind::Even ? HexagonISD::VPACKE + : HexagonISD::VPACKO; + return DAG.getNode(Opc, dl, VT, {Op.getOperand(1), Op.getOperand(0)}); } // We used to assert in the "else" part here, but that is bad for Halide // Halide creates intermediate double registers by interleaving two @@ -2531,19 +2441,26 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Size > 64) return SDValue(); - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; unsigned NElts = BVN->getNumOperands(); // Try to generate a SPLAT instruction. - if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) && - (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, true) && SplatBitSize <= 16)) { - unsigned SplatBits = APSplatBits.getZExtValue(); - int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >> - (32 - SplatBitSize)); - return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32)); + if (VT == MVT::v4i8 || VT == MVT::v4i16 || VT == MVT::v2i32) { + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, false)) { + if (SplatBitSize == VT.getVectorElementType().getSizeInBits()) { + unsigned ZV = APSplatBits.getZExtValue(); + assert(SplatBitSize <= 32 && "Can only handle up to i32"); + // Sign-extend the splat value from SplatBitSize to 32. + int32_t SV = SplatBitSize < 32 + ? int32_t(ZV << (32-SplatBitSize)) >> (32-SplatBitSize) + : int32_t(ZV); + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, + DAG.getConstant(SV, dl, MVT::i32)); + } + } } // Try to generate COMBINE to build v2i32 vectors. @@ -2974,8 +2891,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); - // Custom lower some vector loads. - case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VSELECT: return LowerVSELECT(Op, DAG); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h index bfd2c94eeaba..d66cbc95e918 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -62,7 +62,8 @@ namespace HexagonISD { EXTRACTU, EXTRACTURP, VCOMBINE, - VPACK, + VPACKE, + VPACKO, TC_RETURN, EH_RETURN, DCFETCH, @@ -164,7 +165,6 @@ namespace HexagonISD { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index c611857ec26a..104a28654dd5 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -1366,6 +1366,18 @@ defm : MaskedStore ; defm : MaskedStore ; defm : MaskedStore ; +//******************************************************************* +// SYSTEM +//******************************************************************* + +def: T_R_pat; +def: T_R_pat; +def: T_R_pat; +def: T_R_pat; + +def: T_RR_pat; +def: T_RP_pat; + include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index a331c978f59d..374ffa3799b0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -10,8 +10,6 @@ // load/store instructions. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "opt-addr-mode" - #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "MCTargetDesc/HexagonBaseInfo.h" @@ -36,6 +34,8 @@ #include #include +#define DEBUG_TYPE "opt-addr-mode" + static cl::opt CodeGrowthLimit("hexagon-amode-growth-limit", cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode " "optimization")); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td index ba98b8994937..804a547d5b33 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -2250,6 +2250,12 @@ def: Storea_pat, I32, addrgp, PS_storerhabs>; def: Storea_pat, I32, addrgp, PS_storeriabs>; def: Storea_pat, I64, addrgp, PS_storerdabs>; +// Prefer this pattern to S2_asl_i_p_or for the special case of joining +// two 32-bit words into a 64-bit word. +let AddedComplexity = 200 in +def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), + (A2_combinew I32:$a, I32:$b)>; + def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)), (i64 (zext (i32 (and I32:$a, (i32 65535)))))), (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))), @@ -2971,45 +2977,40 @@ def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, Requires<[UseHVXDbl]>; -def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>, - SDTCisInt<3>]>; +def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; -def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>; +def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; +def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; -// 0 as the last argument denotes vpacke. 1 denotes vpacko -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 0))), - (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 1))), - (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 0))), - (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 1))), - (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; +let Predicates = [UseHVXSgl] in { + def: Pat<(v64i8 (HexagonVPACKE (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt))), + (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v64i8 (HexagonVPACKO (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt))), + (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v32i16 (HexagonVPACKE (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt))), + (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v32i16 (HexagonVPACKO (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt))), + (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>; +} -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 1))), - (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 1))), - (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; +let Predicates = [UseHVXDbl] in { + def: Pat<(v128i8 (HexagonVPACKE (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt))), + (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v128i8 (HexagonVPACKO (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt))), + (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v64i16 (HexagonVPACKE (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt))), + (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v64i16 (HexagonVPACKO (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt))), + (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; +} def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; @@ -3073,6 +3074,10 @@ def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; // four halfwords of 64-bits destination register. def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; +def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), + (A2_combineii imm:$s8, imm:$s8)>; +def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (A2_combinew I32:$Rs, I32:$Rs)>; + class VArith_pat : Pat <(Op Type:$Rss, Type:$Rtt), @@ -3099,14 +3104,11 @@ def: VArith_pat ; def: VArith_pat ; def: VArith_pat ; -def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_asr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_lsr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_asl_i_vw V2I32:$b, imm:$c)>; def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 34df2ebcc520..ea86c9c42f47 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -53,6 +53,10 @@ static cl::opt EmitJtInText("hexagon-emit-jt-text", cl::Hidden, cl::init(false), cl::desc("Emit hexagon jump tables in function section")); +static cl::opt + EmitLutInText("hexagon-emit-lut-text", cl::Hidden, cl::init(false), + cl::desc("Emit hexagon lookup tables in function section")); + // TraceGVPlacement controls messages for all builds. For builds with assertions // (debug or release), messages are also controlled by the usual debug flags // (e.g. -debug and -debug-only=globallayout) @@ -136,6 +140,13 @@ MCSection *HexagonTargetObjectFile::SelectSectionForGlobal( << (Kind.isBSS() ? "kind_bss " : "" ) << (Kind.isBSSLocal() ? "kind_bss_local " : "" )); + // If the lookup table is used by more than one function, do not place + // it in text section. + if (EmitLutInText && GO->getName().startswith("switch.table")) { + if (const Function *Fn = getLutUsedFunction(GO)) + return selectSectionForLookupTable(GO, TM, Fn); + } + if (isGlobalInSmallSection(GO, TM)) return selectSmallSectionForGlobal(GO, Kind, TM); @@ -402,3 +413,39 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( // Otherwise, we work the same as ELF. return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM); } + +// Return the function that uses the lookup table. If there are more +// than one live function that uses this look table, bail out and place +// the lookup table in default section. +const Function * +HexagonTargetObjectFile::getLutUsedFunction(const GlobalObject *GO) const { + const Function *ReturnFn = nullptr; + for (auto U : GO->users()) { + // validate each instance of user to be a live function. + auto *I = dyn_cast(U); + if (!I) + continue; + auto *Bb = I->getParent(); + if (!Bb) + continue; + auto *UserFn = Bb->getParent(); + if (!ReturnFn) + ReturnFn = UserFn; + else if (ReturnFn != UserFn) + return nullptr; + } + return ReturnFn; +} + +MCSection *HexagonTargetObjectFile::selectSectionForLookupTable( + const GlobalObject *GO, const TargetMachine &TM, const Function *Fn) const { + + SectionKind Kind = SectionKind::getText(); + // If the function has explicit section, place the lookup table in this + // explicit section. + if (Fn->hasSection()) + return getExplicitSectionGlobal(Fn, Kind, TM); + + const auto *FuncObj = dyn_cast(Fn); + return SelectSectionForGlobal(FuncObj, Kind, TM); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h index 373d850b53be..eff44f097e03 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -36,6 +36,8 @@ namespace llvm { bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, const Function &F) const override; + const Function *getLutUsedFunction(const GlobalObject *GO) const; + private: MCSectionELF *SmallDataSection; MCSectionELF *SmallBSSSection; @@ -46,6 +48,10 @@ namespace llvm { MCSection *selectSmallSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const; + + MCSection *selectSectionForLookupTable(const GlobalObject *GO, + const TargetMachine &TM, + const Function *Fn) const; }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index b72c9d534478..e12188e70602 100644 --- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -304,9 +304,6 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); - bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI); - bool reportParseError(Twine ErrorMsg); bool reportParseError(SMLoc Loc, Twine ErrorMsg); @@ -2514,16 +2511,6 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::SEQIMacro: return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; - case Mips::MFTC0: case Mips::MTTC0: - case Mips::MFTGPR: case Mips::MTTGPR: - case Mips::MFTLO: case Mips::MTTLO: - case Mips::MFTHI: case Mips::MTTHI: - case Mips::MFTACX: case Mips::MTTACX: - case Mips::MFTDSP: case Mips::MTTDSP: - case Mips::MFTC1: case Mips::MTTC1: - case Mips::MFTHC1: case Mips::MTTHC1: - case Mips::CFTC1: case Mips::CTTC1: - return expandMXTRAlias(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; } } @@ -4895,212 +4882,6 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } -// Map the DSP accumulator and control register to the corresponding gpr -// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions -// do not map the DSP registers contigously to gpr registers. -static unsigned getRegisterForMxtrDSP(MCInst &Inst, bool IsMFDSP) { - switch (Inst.getOpcode()) { - case Mips::MFTLO: - case Mips::MTTLO: - switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { - case Mips::AC0: - return Mips::ZERO; - case Mips::AC1: - return Mips::A0; - case Mips::AC2: - return Mips::T0; - case Mips::AC3: - return Mips::T4; - default: - llvm_unreachable("Unknown register for 'mttr' alias!"); - } - case Mips::MFTHI: - case Mips::MTTHI: - switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { - case Mips::AC0: - return Mips::AT; - case Mips::AC1: - return Mips::A1; - case Mips::AC2: - return Mips::T1; - case Mips::AC3: - return Mips::T5; - default: - llvm_unreachable("Unknown register for 'mttr' alias!"); - } - case Mips::MFTACX: - case Mips::MTTACX: - switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { - case Mips::AC0: - return Mips::V0; - case Mips::AC1: - return Mips::A2; - case Mips::AC2: - return Mips::T2; - case Mips::AC3: - return Mips::T6; - default: - llvm_unreachable("Unknown register for 'mttr' alias!"); - } - case Mips::MFTDSP: - case Mips::MTTDSP: - return Mips::S0; - default: - llvm_unreachable("Unknown instruction for 'mttr' dsp alias!"); - } -} - -// Map the floating point register operand to the corresponding register -// operand. -static unsigned getRegisterForMxtrFP(MCInst &Inst, bool IsMFTC1) { - switch (Inst.getOperand(IsMFTC1 ? 1 : 0).getReg()) { - case Mips::F0: return Mips::ZERO; - case Mips::F1: return Mips::AT; - case Mips::F2: return Mips::V0; - case Mips::F3: return Mips::V1; - case Mips::F4: return Mips::A0; - case Mips::F5: return Mips::A1; - case Mips::F6: return Mips::A2; - case Mips::F7: return Mips::A3; - case Mips::F8: return Mips::T0; - case Mips::F9: return Mips::T1; - case Mips::F10: return Mips::T2; - case Mips::F11: return Mips::T3; - case Mips::F12: return Mips::T4; - case Mips::F13: return Mips::T5; - case Mips::F14: return Mips::T6; - case Mips::F15: return Mips::T7; - case Mips::F16: return Mips::S0; - case Mips::F17: return Mips::S1; - case Mips::F18: return Mips::S2; - case Mips::F19: return Mips::S3; - case Mips::F20: return Mips::S4; - case Mips::F21: return Mips::S5; - case Mips::F22: return Mips::S6; - case Mips::F23: return Mips::S7; - case Mips::F24: return Mips::T8; - case Mips::F25: return Mips::T9; - case Mips::F26: return Mips::K0; - case Mips::F27: return Mips::K1; - case Mips::F28: return Mips::GP; - case Mips::F29: return Mips::SP; - case Mips::F30: return Mips::FP; - case Mips::F31: return Mips::RA; - default: llvm_unreachable("Unknown register for mttc1 alias!"); - } -} - -// Map the coprocessor operand the corresponding gpr register operand. -static unsigned getRegisterForMxtrC0(MCInst &Inst, bool IsMFTC0) { - switch (Inst.getOperand(IsMFTC0 ? 1 : 0).getReg()) { - case Mips::COP00: return Mips::ZERO; - case Mips::COP01: return Mips::AT; - case Mips::COP02: return Mips::V0; - case Mips::COP03: return Mips::V1; - case Mips::COP04: return Mips::A0; - case Mips::COP05: return Mips::A1; - case Mips::COP06: return Mips::A2; - case Mips::COP07: return Mips::A3; - case Mips::COP08: return Mips::T0; - case Mips::COP09: return Mips::T1; - case Mips::COP010: return Mips::T2; - case Mips::COP011: return Mips::T3; - case Mips::COP012: return Mips::T4; - case Mips::COP013: return Mips::T5; - case Mips::COP014: return Mips::T6; - case Mips::COP015: return Mips::T7; - case Mips::COP016: return Mips::S0; - case Mips::COP017: return Mips::S1; - case Mips::COP018: return Mips::S2; - case Mips::COP019: return Mips::S3; - case Mips::COP020: return Mips::S4; - case Mips::COP021: return Mips::S5; - case Mips::COP022: return Mips::S6; - case Mips::COP023: return Mips::S7; - case Mips::COP024: return Mips::T8; - case Mips::COP025: return Mips::T9; - case Mips::COP026: return Mips::K0; - case Mips::COP027: return Mips::K1; - case Mips::COP028: return Mips::GP; - case Mips::COP029: return Mips::SP; - case Mips::COP030: return Mips::FP; - case Mips::COP031: return Mips::RA; - default: llvm_unreachable("Unknown register for mttc0 alias!"); - } -} - -/// Expand an alias of 'mftr' or 'mttr' into the full instruction, by producing -/// an mftr or mttr with the correctly mapped gpr register, u, sel and h bits. -bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI) { - MipsTargetStreamer &TOut = getTargetStreamer(); - unsigned rd = 0; - unsigned u = 1; - unsigned sel = 0; - unsigned h = 0; - bool IsMFTR = false; - switch (Inst.getOpcode()) { - case Mips::MFTC0: - IsMFTR = true; - LLVM_FALLTHROUGH; - case Mips::MTTC0: - u = 0; - rd = getRegisterForMxtrC0(Inst, IsMFTR); - sel = Inst.getOperand(2).getImm(); - break; - case Mips::MFTGPR: - IsMFTR = true; - LLVM_FALLTHROUGH; - case Mips::MTTGPR: - rd = Inst.getOperand(IsMFTR ? 1 : 0).getReg(); - break; - case Mips::MFTLO: - case Mips::MFTHI: - case Mips::MFTACX: - case Mips::MFTDSP: - IsMFTR = true; - LLVM_FALLTHROUGH; - case Mips::MTTLO: - case Mips::MTTHI: - case Mips::MTTACX: - case Mips::MTTDSP: - rd = getRegisterForMxtrDSP(Inst, IsMFTR); - sel = 1; - break; - case Mips::MFTHC1: - h = 1; - LLVM_FALLTHROUGH; - case Mips::MFTC1: - IsMFTR = true; - rd = getRegisterForMxtrFP(Inst, IsMFTR); - sel = 2; - break; - case Mips::MTTHC1: - h = 1; - LLVM_FALLTHROUGH; - case Mips::MTTC1: - rd = getRegisterForMxtrFP(Inst, IsMFTR); - sel = 2; - break; - case Mips::CFTC1: - IsMFTR = true; - LLVM_FALLTHROUGH; - case Mips::CTTC1: - rd = getRegisterForMxtrFP(Inst, IsMFTR); - sel = 3; - break; - } - unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd; - unsigned Op1 = - IsMFTR ? rd - : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg() - : Inst.getOperand(0).getReg()); - - TOut.emitRRIII(IsMFTR ? Mips::MFTR : Mips::MTTR, Op0, Op1, u, sel, h, IDLoc, - STI); - return false; -} - unsigned MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 7caeb08589af..2907b7715857 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -193,21 +193,6 @@ void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); } -void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, - unsigned Reg1, int16_t Imm0, int16_t Imm1, - int16_t Imm2, SMLoc IDLoc, - const MCSubtargetInfo *STI) { - MCInst TmpInst; - TmpInst.setOpcode(Opcode); - TmpInst.addOperand(MCOperand::createReg(Reg0)); - TmpInst.addOperand(MCOperand::createReg(Reg1)); - TmpInst.addOperand(MCOperand::createImm(Imm0)); - TmpInst.addOperand(MCOperand::createImm(Imm1)); - TmpInst.addOperand(MCOperand::createImm(Imm2)); - TmpInst.setLoc(IDLoc); - getStreamer().EmitInstruction(TmpInst, *STI); -} - void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI) { diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td index d2f0fdcc6cc1..6ceb05577538 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.td +++ b/contrib/llvm/lib/Target/Mips/Mips.td @@ -190,6 +190,9 @@ def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; +def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true", + "Disable use of the jal instruction">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index a6ec9fb2e598..20319f85696c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -364,6 +364,18 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); + if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) { + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + } + + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); @@ -469,6 +481,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::SHL); @@ -923,14 +936,127 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, } } +static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, + const MipsSubtarget &Subtarget) { + // ROOTNode must have a multiplication as an operand for the match to be + // successful. + if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL && + ROOTNode->getOperand(1).getOpcode() != ISD::MUL) + return SDValue(); + + // We don't handle vector types here. + if (ROOTNode->getValueType(0).isVector()) + return SDValue(); + + // For MIPS64, madd / msub instructions are inefficent to use with 64 bit + // arithmetic. E.g. + // (add (mul a b) c) => + // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in + // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32) + // or + // MIPS64R2: (dins (mflo res) (mfhi res) 32 32) + // + // The overhead of setting up the Hi/Lo registers and reassembling the + // result makes this a dubious optimzation for MIPS64. The core of the + // problem is that Hi/Lo contain the upper and lower 32 bits of the + // operand and result. + // + // It requires a chain of 4 add/mul for MIPS64R2 to get better code + // density than doing it naively, 5 for MIPS64. Additionally, using + // madd/msub on MIPS64 requires the operands actually be 32 bit sign + // extended operands, not true 64 bit values. + // + // FIXME: For the moment, disable this completely for MIPS64. + if (Subtarget.hasMips64()) + return SDValue(); + + SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(0) + : ROOTNode->getOperand(1); + + SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(1) + : ROOTNode->getOperand(0); + + // Transform this to a MADD only if the user of this node is the add. + // If there are other users of the mul, this function returns here. + if (!Mult.hasOneUse()) + return SDValue(); + + // maddu and madd are unusual instructions in that on MIPS64 bits 63..31 + // must be in canonical form, i.e. sign extended. For MIPS32, the operands + // of the multiply must have 32 or more sign bits, otherwise we cannot + // perform this optimization. We have to check this here as we're performing + // this optimization pre-legalization. + SDValue MultLHS = Mult->getOperand(0); + SDValue MultRHS = Mult->getOperand(1); + + bool IsSigned = MultLHS->getOpcode() == ISD::SIGN_EXTEND && + MultRHS->getOpcode() == ISD::SIGN_EXTEND; + bool IsUnsigned = MultLHS->getOpcode() == ISD::ZERO_EXTEND && + MultRHS->getOpcode() == ISD::ZERO_EXTEND; + + if (!IsSigned && !IsUnsigned) + return SDValue(); + + // Initialize accumulator. + SDLoc DL(ROOTNode); + SDValue TopHalf; + SDValue BottomHalf; + BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(0, DL)); + + TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(1, DL)); + SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + BottomHalf, + TopHalf); + + // Create MipsMAdd(u) / MipsMSub(u) node. + bool IsAdd = ROOTNode->getOpcode() == ISD::ADD; + unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd) + : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub); + SDValue MAddOps[3] = { + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)), + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn}; + EVT VTs[2] = {MVT::i32, MVT::i32}; + SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); + + SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); + SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); + SDValue Combined = + CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi); + return Combined; +} + +static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + // (sub v0 (mul v1, v2)) => (msub v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); + + return SDValue(); + } + + return SDValue(); +} + static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { - // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + // (add v0 (mul v1, v2)) => (madd v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); - if (DCI.isBeforeLegalizeOps()) return SDValue(); + } + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) @@ -1058,6 +1184,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performAssertZextCombine(N, DAG, DCI, Subtarget); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); + case ISD::SUB: + return performSUBCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -3021,6 +3149,20 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT Ty = Callee.getValueType(); bool GlobalOrExternal = false, IsCallReloc = false; + // The long-calls feature is ignored in case of PIC. + // While we do not support -mshared / -mno-shared properly, + // ignore long-calls in case of -mabicalls too. + if (Subtarget.useLongCalls() && !Subtarget.isABICalls() && !IsPIC) { + // Get the address of the callee into a register to prevent + // using of the `jal` instruction for the direct call. + if (auto *N = dyn_cast(Callee)) + Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) + : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); + else if (auto *N = dyn_cast(Callee)) + Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) + : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); + } + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (IsPIC) { const GlobalValue *Val = G->getGlobal(); diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td index 94f3a74be98b..0333fe6520fa 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td @@ -443,8 +443,17 @@ let AdditionalPredicates = [NotInMicroMips] in { } def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, bitconvert>, MFC1_FM<0>; +def MFC1_D64 : MFC1_FT<"mfc1", GPR32Opnd, FGR64Opnd, II_MFC1>, MFC1_FM<0>, + FGR_64 { + let DecoderNamespace = "Mips64"; +} def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, MFC1_FM<4>; +def MTC1_D64 : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM<4>, + FGR_64 { + let DecoderNamespace = "Mips64"; +} + let AdditionalPredicates = [NotInMicroMips] in { def MFHC1_D32 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>, MFC1_FM<3>, ISA_MIPS32R2, FGR_32; diff --git a/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td index edc0981e6278..64bee5bfba18 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsMTInstrFormats.td @@ -35,8 +35,6 @@ class FIELD5 Val> { def FIELD5_1_DMT_EMT : FIELD5<0b00001>; def FIELD5_2_DMT_EMT : FIELD5<0b01111>; def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>; -def FIELD5_MFTR : FIELD5<0b01000>; -def FIELD5_MTTR : FIELD5<0b01100>; class COP0_MFMC0_MT : MipsMTInst { bits<32> Inst; @@ -52,25 +50,6 @@ class COP0_MFMC0_MT : MipsMTInst { let Inst{2-0} = 0b001; } -class COP0_MFTTR_MT : MipsMTInst { - bits<32> Inst; - - bits<5> rt; - bits<5> rd; - bits<1> u; - bits<1> h; - bits<3> sel; - let Inst{31-26} = 0b010000; // COP0 - let Inst{25-21} = Op.Value; // MFMC0 - let Inst{20-16} = rt; - let Inst{15-11} = rd; - let Inst{10-6} = 0b00000; // rx - currently unsupported. - let Inst{5} = u; - let Inst{4} = h; - let Inst{3} = 0b0; - let Inst{2-0} = sel; -} - class SPECIAL3_MT_FORK : MipsMTInst { bits<32> Inst; diff --git a/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td index 72e626cbec40..ab6693f60fd9 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsMTInstrInfo.td @@ -6,13 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file describes the MIPS MT ASE as defined by MD00378 1.12. -// -// TODO: Add support for the microMIPS encodings for the MT ASE and add the -// instruction mappings. -// -//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // MIPS MT Instruction Encodings @@ -34,10 +27,6 @@ class FORK_ENC : SPECIAL3_MT_FORK; class YIELD_ENC : SPECIAL3_MT_YIELD; -class MFTR_ENC : COP0_MFTTR_MT; - -class MTTR_ENC : COP0_MFTTR_MT; - //===----------------------------------------------------------------------===// // MIPS MT Instruction Descriptions //===----------------------------------------------------------------------===// @@ -50,22 +39,6 @@ class MT_1R_DESC_BASE { InstrItinClass Itinerary = Itin; } -class MFTR_DESC { - dag OutOperandList = (outs GPR32Opnd:$rd); - dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); - string AsmString = "mftr\t$rd, $rt, $u, $sel, $h"; - list Pattern = []; - InstrItinClass Itinerary = II_MFTR; -} - -class MTTR_DESC { - dag OutOperandList = (outs GPR32Opnd:$rd); - dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); - string AsmString = "mttr\t$rt, $rd, $u, $sel, $h"; - list Pattern = []; - InstrItinClass Itinerary = II_MTTR; -} - class FORK_DESC { dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd); dag InOperandList = (ins GPR32Opnd:$rt); @@ -106,73 +79,8 @@ let hasSideEffects = 1, isNotDuplicable = 1, def FORK : FORK_ENC, FORK_DESC, ASE_MT; def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT; - - def MFTR : MFTR_ENC, MFTR_DESC, ASE_MT; - - def MTTR : MTTR_ENC, MTTR_DESC, ASE_MT; } -//===----------------------------------------------------------------------===// -// MIPS MT Pseudo Instructions - used to support mtfr & mttr aliases. -//===----------------------------------------------------------------------===// -def MFTC0 : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins COP0Opnd:$rt, - uimm3:$sel), - "mftc0 $rd, $rt, $sel">, ASE_MT; - -def MFTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt, - uimm3:$sel), - "mftgpr $rd, $rt">, ASE_MT; - -def MFTLO : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), - "mftlo $rt, $ac">, ASE_MT; - -def MFTHI : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), - "mfthi $rt, $ac">, ASE_MT; - -def MFTACX : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), - "mftacx $rt, $ac">, ASE_MT; - -def MFTDSP : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins), - "mftdsp $rt">, ASE_MT; - -def MFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), - "mftc1 $rt, $ft">, ASE_MT; - -def MFTHC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), - "mfthc1 $rt, $ft">, ASE_MT; - -def CFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGRCCOpnd:$ft), - "cftc1 $rt, $ft">, ASE_MT; - - -def MTTC0 : MipsAsmPseudoInst<(outs COP0Opnd:$rd), (ins GPR32Opnd:$rt, - uimm3:$sel), - "mttc0 $rt, $rd, $sel">, ASE_MT; - -def MTTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins GPR32Opnd:$rd), - "mttgpr $rd, $rt">, ASE_MT; - -def MTTLO : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), - "mttlo $rt, $ac">, ASE_MT; - -def MTTHI : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), - "mtthi $rt, $ac">, ASE_MT; - -def MTTACX : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), - "mttacx $rt, $ac">, ASE_MT; - -def MTTDSP : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rt), - "mttdsp $rt">, ASE_MT; - -def MTTC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), - "mttc1 $rt, $ft">, ASE_MT; - -def MTTHC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), - "mtthc1 $rt, $ft">, ASE_MT; - -def CTTC1 : MipsAsmPseudoInst<(outs FGRCCOpnd:$ft), (ins GPR32Opnd:$rt), - "cttc1 $rt, $ft">, ASE_MT; - //===----------------------------------------------------------------------===// // MIPS MT Instruction Definitions //===----------------------------------------------------------------------===// @@ -187,22 +95,4 @@ let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT; def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT; - - def : MipsInstAlias<"mftc0 $rd, $rt", (MFTC0 GPR32Opnd:$rd, COP0Opnd:$rt, 0), - 1>, ASE_MT; - - def : MipsInstAlias<"mftlo $rt", (MFTLO GPR32Opnd:$rt, AC0), 1>, ASE_MT; - - def : MipsInstAlias<"mfthi $rt", (MFTHI GPR32Opnd:$rt, AC0), 1>, ASE_MT; - - def : MipsInstAlias<"mftacx $rt", (MFTACX GPR32Opnd:$rt, AC0), 1>, ASE_MT; - - def : MipsInstAlias<"mttc0 $rd, $rt", (MTTC0 COP0Opnd:$rt, GPR32Opnd:$rd, 0), - 1>, ASE_MT; - - def : MipsInstAlias<"mttlo $rt", (MTTLO AC0, GPR32Opnd:$rt), 1>, ASE_MT; - - def : MipsInstAlias<"mtthi $rt", (MTTHI AC0, GPR32Opnd:$rt), 1>, ASE_MT; - - def : MipsInstAlias<"mttacx $rt", (MTTACX AC0, GPR32Opnd:$rt), 1>, ASE_MT; } diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 49ae6dd4cd39..4be26dd25dc0 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -245,46 +245,64 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { } } -void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, - SDValue CmpLHS, const SDLoc &DL, - SDNode *Node) const { - unsigned Opc = InFlag.getOpcode(); (void)Opc; - - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; - if (Subtarget->isGP64bit()) { - SLTuOp = Mips::SLTu64; - ADDuOp = Mips::DADDu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; +void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { + SDValue InFlag = Node->getOperand(2); + unsigned Opc = InFlag.getOpcode(); SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); - - if (Subtarget->isGP64bit()) { - // On 64-bit targets, sltu produces an i64 but our backend currently says - // that SLTu64 produces an i32. We need to fix this in the long run but for - // now, just make the DAG type-correct by asserting the upper bits are zero. - Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, - CurDAG->getTargetConstant(0, DL, VT), - SDValue(Carry, 0), - CurDAG->getTargetConstant(Mips::sub_32, DL, - VT)); + // In the base case, we can rely on the carry bit from the addsc + // instruction. + if (Opc == ISD::ADDC) { + SDValue Ops[3] = {LHS, RHS, InFlag}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops); + return; } - // Generate a second addition only if we know that RHS is not a - // constant-zero node. - SDNode *AddCarry = Carry; - ConstantSDNode *C = dyn_cast(RHS); - if (!C || C->getZExtValue()) - AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); + assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!"); - CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); + // The more complex case is when there is a chain of ISD::ADDE nodes like: + // (adde (adde (adde (addc a b) c) d) e). + // + // The addwc instruction does not write to the carry bit, instead it writes + // to bit 20 of the dsp control register. To match this series of nodes, each + // intermediate adde node must be expanded to write the carry bit before the + // addition. + + // Start by reading the overflow field for addsc and moving the value to the + // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP + // corresponds to reading/writing the entire control register to/from a GPR. + + SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32); + + SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32); + + SDNode *DSPCtrlField = + CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag); + + SDNode *Carry = CurDAG->getMachineNode( + Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne); + + SDValue Ops[4] = {SDValue(DSPCtrlField, 0), + CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne, + SDValue(Carry, 0)}; + SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); + + // My reading of the the MIPS DSP 3.01 specification isn't as clear as I + // would like about whether bit 20 always gets overwritten by addwc. + // Hence take an extremely conservative view and presume it's sticky. We + // therefore need to clear it. + + SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); + + SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)}; + SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps); + + SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue, + SDValue(DSPCtrlFinal, 0), CstOne); + + SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands); } /// Match frameindex @@ -765,19 +783,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { switch(Opcode) { default: break; - case ISD::SUBE: { - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu; - selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node); - return true; - } - case ISD::ADDE: { - if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. - break; - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu; - selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node); + selectAddE(Node, DL); return true; } diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index f89a350cab04..6f38289c5a45 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -41,8 +41,7 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { const SDLoc &dl, EVT Ty, bool HasLo, bool HasHi); - void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, - const SDLoc &DL, SDNode *Node) const; + void selectAddE(SDNode *Node, const SDLoc &DL) const; bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 06a97b9d123e..72b2738bfac4 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -179,8 +179,6 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setTargetDAGCombine(ISD::ADDE); - setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -421,163 +419,6 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, return MipsTargetLowering::LowerOperation(Op, DAG); } -// selectMADD - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc multLo, Lo0), (adde multHi, Hi0), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { - // ADDENode's second operand must be a flag output of an ADDC node in order - // for the matching to be successful. - SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); - - if (ADDCNode->getOpcode() != ISD::ADDC) - return false; - - SDValue MultHi = ADDENode->getOperand(0); - SDValue MultLo = ADDCNode->getOperand(0); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MADD only if ADDENode and ADDCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than ADDENode or ADDCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MADD instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(ADDENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - ADDCNode->getOperand(1), - ADDENode->getOperand(1)); - - // create MipsMAdd(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - - SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of adde and addc here - if (!SDValue(ADDCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); - } - if (!SDValue(ADDENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); - } - - return true; -} - -// selectMSUB - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc Lo0, multLo), (sube Hi0, multHi), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { - // SUBENode's second operand must be a flag output of an SUBC node in order - // for the matching to be successful. - SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); - - if (SUBCNode->getOpcode() != ISD::SUBC) - return false; - - SDValue MultHi = SUBENode->getOperand(1); - SDValue MultLo = SUBCNode->getOperand(1); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MSUB only if SUBENode and SUBCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than SUBENode or SUBCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MSUB instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(SUBENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - SUBCNode->getOperand(0), - SUBENode->getOperand(0)); - - // create MipsSub(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - - SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of sube and subc here - if (!SDValue(SUBCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); - } - if (!SDValue(SUBENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); - } - - return true; -} - -static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT // // Performs the following transformations: @@ -820,19 +661,6 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && - selectMSUB(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Clear the upper (64 - VT.sizeInBits) bits. @@ -1110,16 +938,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SDValue Val; switch (N->getOpcode()) { - case ISD::ADDE: - return performADDECombine(N, DAG, DCI, Subtarget); case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; case ISD::OR: Val = performORCombine(N, DAG, DCI, Subtarget); break; - case ISD::SUBE: - return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: return performMULCombine(N, DAG, DCI, this); case ISD::SHL: @@ -3596,9 +3420,17 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass : &Mips::GPR64RegClass); const bool UsingMips32 = RC == &Mips::GPR32RegClass; - unsigned Rs = RegInfo.createVirtualRegister(RC); + unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); + if(!UsingMips32) { + unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) + .addImm(0) + .addReg(Rs) + .addImm(Mips::sub_32); + Rs = Tmp; + } BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) .addReg(Rs) .addReg(Rt) @@ -3649,6 +3481,12 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, for (unsigned i = 1; i < MI.getNumOperands(); i++) MIB.add(MI.getOperand(i)); + if(!UsingMips32) { + unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); + Rt = Tmp; + } + BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); MI.eraseFromParent(); @@ -3716,6 +3554,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; + bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); @@ -3726,7 +3565,9 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); const TargetRegisterClass *GPRRC = IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1; + unsigned MFC1Opc = IsFGR64onMips64 + ? Mips::DMFC1 + : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; // Perform the register class copy as mentioned above. @@ -3735,7 +3576,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); unsigned WPHI = Wtemp; - if (!Subtarget.hasMips64() && IsFGR64) { + if (IsFGR64onMips32) { unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); @@ -3829,7 +3670,9 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *GPRRC = IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1; + unsigned MTC1Opc = IsFGR64onMips64 + ? Mips::DMTC1 + : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td index 8ec55ab6284d..c2947bb44ef5 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td +++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td @@ -226,7 +226,6 @@ def II_MFC1 : InstrItinClass; def II_MFHC1 : InstrItinClass; def II_MFC2 : InstrItinClass; def II_MFHI_MFLO : InstrItinClass; // mfhi and mflo -def II_MFTR : InstrItinClass; def II_MOD : InstrItinClass; def II_MODU : InstrItinClass; def II_MOVE : InstrItinClass; @@ -256,7 +255,6 @@ def II_MTC1 : InstrItinClass; def II_MTHC1 : InstrItinClass; def II_MTC2 : InstrItinClass; def II_MTHI_MTLO : InstrItinClass; // mthi and mtlo -def II_MTTR : InstrItinClass; def II_MUL : InstrItinClass; def II_MUH : InstrItinClass; def II_MUHU : InstrItinClass; @@ -666,14 +664,12 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index 7619e7b08612..cce3b8c4c8d1 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -152,6 +152,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasMT -- support MT ASE. bool HasMT; + // Disable use of the `jal` instruction. + bool UseLongCalls = false; + InstrItineraryData InstrItins; // We can override the determination of whether we are in mips16 mode @@ -269,6 +272,8 @@ class MipsSubtarget : public MipsGenSubtargetInfo { bool useSoftFloat() const { return IsSoftFloat; } + bool useLongCalls() const { return UseLongCalls; } + bool enableLongBranchPass() const { return hasStandardEncoding() || allowMixed16_32(); } diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h index af24838665e1..7d9f99ce071e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h +++ b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h @@ -119,9 +119,6 @@ class MipsTargetStreamer : public MCTargetStreamer { SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI); - void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, - int16_t Imm1, int16_t Imm2, SMLoc IDLoc, - const MCSubtargetInfo *STI); void emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI); void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index a00b56af0490..92c8c224b71b 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -271,7 +271,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12; const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isImm()); + assert(MO.isImm() && !(MO.getImm() % 16) && + "Expecting an immediate that is a multiple of 16"); return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3aaf7ef2c2a0..901539b682ba 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -178,7 +178,7 @@ namespace { /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -211,7 +211,11 @@ namespace { /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); + } + + bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); } // Select an address into a single register. @@ -305,6 +309,7 @@ namespace { bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); + bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); }; @@ -2999,6 +3004,25 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); } +/// Does this node represent a load/store node whose address can be represented +/// with a register plus an immediate that's a multiple of \p Val: +bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { + LoadSDNode *LDN = dyn_cast(N); + StoreSDNode *STN = dyn_cast(N); + SDValue AddrOp; + if (LDN) + AddrOp = LDN->getOperand(1); + else if (STN) + AddrOp = STN->getOperand(2); + + short Imm = 0; + if (AddrOp.getOpcode() == ISD::ADD) + return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); + + // If the address comes from the outside, the offset will be zero. + return AddrOp.getOpcode() == ISD::CopyFromReg; +} + void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0e069ec1665f..b3a3c73f6df0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2130,12 +2130,12 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better -/// represented as reg+reg. If Aligned is true, only accept displacements -/// suitable for STD and friends, i.e. multiples of 4. +/// represented as reg+reg. If \p Alignment is non-zero, only accept +/// displacements that are multiples of that value. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, - bool Aligned) const { + unsigned Alignment) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. @@ -2145,7 +2145,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, if (N.getOpcode() == ISD::ADD) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -2169,7 +2169,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } else if (N.getOpcode() == ISD::OR) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && - (!Aligned || (imm & 3) == 0)) { + (!Alignment || (imm % Alignment) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. @@ -2196,7 +2196,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" int16_t Imm; - if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { + if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); @@ -2206,7 +2206,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && - (!Aligned || (CN->getZExtValue() & 3) == 0)) { + (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. @@ -2321,14 +2321,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)) return false; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 821927d3b157..49d7d8220af1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -616,7 +616,7 @@ namespace llvm { /// is not better represented as reg+reg. If Aligned is true, only accept /// displacements suitable for STD and friends, i.e. multiples of 4. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG, bool Aligned) const; + SelectionDAG &DAG, unsigned Alignment) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 13b4f9ab962d..e74ba38c351f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1635,8 +1635,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (equalityOnly) { // We need to check the uses of the condition register in order to reject // non-equality comparisons. - for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg), - IE = MRI->use_instr_end(); I != IE; ++I) { + for (MachineRegisterInfo::use_instr_iterator + I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); + I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { unsigned Pred = UseMI->getOperand(0).getImm(); @@ -1658,8 +1659,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL; ++I) { bool FoundUse = false; - for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg), - JE = MRI->use_instr_end(); J != JE; ++J) + for (MachineRegisterInfo::use_instr_iterator + J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end(); + J != JE; ++J) if (&*J == &*I) { FoundUse = true; break; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 6d9f55206b6a..dd7fc2659102 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -405,6 +405,25 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ return cast(N)->getAlignment() < 4; }]>; +// This is a somewhat weaker condition than actually checking for 16-byte +// alignment. It is simply checking that the displacement can be represented +// as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form +// instructions). +def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; + //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -815,7 +834,8 @@ def pred : Operand { def iaddr : ComplexPattern; def xaddr : ComplexPattern; def xoaddr : ComplexPattern; -def ixaddr : ComplexPattern; // "std" +def ixaddr : ComplexPattern; // "std" +def iqaddr : ComplexPattern; // "stxv" // The address in a single register. This is used with the SjLj // pseudo-instructions. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 43635a8919e2..942e8b392b82 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2606,37 +2606,41 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { } // IsLittleEndian, HasP9Vector // D-Form Load/Store - def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; - def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst), + def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst), + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>; - def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst), - (STXVX $rS, xaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst), - (STXVX $rS, xaddr:$dst)>; + def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), @@ -2788,21 +2792,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { let isPseudo = 1 in { def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", - [(set f32:$XT, (load iaddr:$src))]>; + [(set f32:$XT, (load ixaddr:$src))]>; def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", - [(set f64:$XT, (load iaddr:$src))]>; + [(set f64:$XT, (load ixaddr:$src))]>; def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", - [(store f32:$XT, iaddr:$dst)]>; + [(store f32:$XT, ixaddr:$dst)]>; def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", - [(store f64:$XT, iaddr:$dst)]>; + [(store f64:$XT, ixaddr:$dst)]>; } - def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>; - def : Pat<(f32 (fpround (extloadf32 iaddr:$src))), - (f32 (DFLOADf32 iaddr:$src))>; + def : Pat<(f64 (extloadf32 ixaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))), + (f32 (DFLOADf32 ixaddr:$src))>; } // end HasP9Vector, AddedComplexity // Integer extend helper dags 32 -> 64 @@ -2881,13 +2885,13 @@ def FltToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } def FltToLongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A))))); } def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } def FltToULongLoadP9 { - dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A))))); + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A))))); } def FltToLong { dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); @@ -2911,13 +2915,13 @@ def DblToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } def DblToIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A))))); } def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } def DblToUIntLoadP9 { - dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A))))); + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A))))); } def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); @@ -3088,17 +3092,17 @@ let AddedComplexity = 400 in { (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; + (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddr:$A), + (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 8af7f7e98117..9207165c46a6 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -754,19 +754,31 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } -// Figure out if the offset in the instruction must be a multiple of 4. -// This is true for instructions like "STD". -static bool usesIXAddr(const MachineInstr &MI) { +// If the offset must be a multiple of some value, return what that value is. +static unsigned offsetMinAlign(const MachineInstr &MI) { unsigned OpC = MI.getOpcode(); switch (OpC) { default: - return false; + return 1; case PPC::LWA: case PPC::LWA_32: case PPC::LD: + case PPC::LDU: case PPC::STD: - return true; + case PPC::STDU: + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: + case PPC::LXSD: + case PPC::LXSSP: + case PPC::STXSD: + case PPC::STXSSP: + return 4; + case PPC::LXV: + case PPC::STXV: + return 16; } } @@ -852,9 +864,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister( FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false); - // Figure out if the offset in the instruction is shifted right two bits. - bool isIXAddr = usesIXAddr(MI); - // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP && @@ -883,7 +892,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // happen in invalid code. assert(OpC != PPC::DBG_VALUE && "This should be handled in a target-independent way"); - if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) || + if (!noImmForm && ((isInt<16>(Offset) && + ((Offset % offsetMinAlign(MI)) == 0)) || OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT)) { MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -1076,5 +1086,5 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT || - (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); + (isInt<16>(Offset) && (Offset % offsetMinAlign(*MI)) == 0); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index 2dc3828334ac..be705507b534 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -41,6 +41,8 @@ class PPCTargetMachine final : public LLVMTargetMachine { ~PPCTargetMachine() override; const PPCSubtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some targets, is + // deprecated and should not be used. const PPCSubtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td index 11004c5a952f..91cab00b2b65 100644 --- a/contrib/llvm/lib/Target/Sparc/Sparc.td +++ b/contrib/llvm/lib/Target/Sparc/Sparc.td @@ -20,6 +20,10 @@ include "llvm/Target/Target.td" // SPARC Subtarget features. // +def FeatureSoftMulDiv + : SubtargetFeature<"soft-mul-div", "UseSoftMulDiv", "true", + "Use software emulation for integer multiply and divide">; + def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true", "Enable SPARC-V9 instructions">; @@ -75,7 +79,7 @@ class Proc Features> : Processor; def : Proc<"generic", []>; -def : Proc<"v7", []>; +def : Proc<"v7", [FeatureSoftMulDiv]>; def : Proc<"v8", []>; def : Proc<"supersparc", []>; def : Proc<"sparclite", []>; diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 9e7e3c6b705a..6767a59a9757 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -1689,6 +1689,19 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::MUL, MVT::i32, Expand); + if (Subtarget->useSoftMulDiv()) { + // .umul works for both signed and unsigned + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setLibcallName(RTLIB::MUL_I32, ".umul"); + + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setLibcallName(RTLIB::SDIV_I32, ".div"); + + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setLibcallName(RTLIB::UDIV_I32, ".udiv"); + } + if (Subtarget->is64Bit()) { setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td index ae45c8be6752..3194ad4aeb6b 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -27,6 +27,9 @@ def Is32Bit : Predicate<"!Subtarget->is64Bit()">; // True when generating 64-bit code. This also implies HasV9. def Is64Bit : Predicate<"Subtarget->is64Bit()">; +def UseSoftMulDiv : Predicate<"Subtarget->useSoftMulDiv()">, + AssemblerPredicate<"FeatureSoftMulDiv">; + // HasV9 - This predicate is true when the target processor supports V9 // instructions. Note that the machine may be running in 32-bit mode. def HasV9 : Predicate<"Subtarget->isV9()">, diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp index 43ddef3cc96e..daac56add87c 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp @@ -28,6 +28,7 @@ void SparcSubtarget::anchor() { } SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { + UseSoftMulDiv = false; IsV9 = false; IsLeon = false; V8DeprecatedInsts = false; diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h index fa42da425ff2..d18139984b87 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h +++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h @@ -32,6 +32,7 @@ class StringRef; class SparcSubtarget : public SparcGenSubtargetInfo { Triple TargetTriple; virtual void anchor(); + bool UseSoftMulDiv; bool IsV9; bool IsLeon; bool V8DeprecatedInsts; @@ -76,6 +77,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { bool enableMachineScheduler() const override; + bool useSoftMulDiv() const { return UseSoftMulDiv; } bool isV9() const { return IsV9; } bool isLeon() const { return IsLeon; } bool isVIS() const { return IsVIS; } diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index ee23692ad1db..33680789ee08 100644 --- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -275,6 +275,10 @@ class SystemZOperand : public MCParsedAsmOperand { SMLoc getEndLoc() const override { return EndLoc; } void print(raw_ostream &OS) const override; + /// getLocRange - Get the range between the first and last token of this + /// operand. + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + // Used by the TableGen code to add particular types of operand // to an instruction. void addRegOperands(MCInst &Inst, unsigned N) const { @@ -1164,6 +1168,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, return false; } +std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS); + bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -1209,8 +1215,13 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "invalid operand for instruction"); } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = SystemZMnemonicSpellCheck( + ((SystemZOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, + ((SystemZOperand &)*Operands[0]).getLocRange()); + } } llvm_unreachable("Unexpected match type"); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td index c5faa0d62881..fda9c30fe3fc 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -187,6 +187,58 @@ def Arch11NewFeatures : SystemZFeatureList<[ FeatureVector ]>; +//===----------------------------------------------------------------------===// +// +// New features added in the Twelvth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureMiscellaneousExtensions2 : SystemZFeature< + "miscellaneous-extensions-2", "MiscellaneousExtensions2", + "Assume that the miscellaneous-extensions facility 2 is installed" +>; + +def FeatureGuardedStorage : SystemZFeature< + "guarded-storage", "GuardedStorage", + "Assume that the guarded-storage facility is installed" +>; + +def FeatureMessageSecurityAssist7 : SystemZFeature< + "message-security-assist-extension7", "MessageSecurityAssist7", + "Assume that the message-security-assist extension facility 7 is installed" +>; + +def FeatureMessageSecurityAssist8 : SystemZFeature< + "message-security-assist-extension8", "MessageSecurityAssist8", + "Assume that the message-security-assist extension facility 8 is installed" +>; + +def FeatureVectorEnhancements1 : SystemZFeature< + "vector-enhancements-1", "VectorEnhancements1", + "Assume that the vector enhancements facility 1 is installed" +>; +def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">; + +def FeatureVectorPackedDecimal : SystemZFeature< + "vector-packed-decimal", "VectorPackedDecimal", + "Assume that the vector packed decimal facility is installed" +>; + +def FeatureInsertReferenceBitsMultiple : SystemZFeature< + "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", + "Assume that the insert-reference-bits-multiple facility is installed" +>; + +def Arch12NewFeatures : SystemZFeatureList<[ + FeatureMiscellaneousExtensions2, + FeatureGuardedStorage, + FeatureMessageSecurityAssist7, + FeatureMessageSecurityAssist8, + FeatureVectorEnhancements1, + FeatureVectorPackedDecimal, + FeatureInsertReferenceBitsMultiple +]>; + //===----------------------------------------------------------------------===// // // Cumulative supported and unsupported feature sets @@ -201,9 +253,13 @@ def Arch10SupportedFeatures : SystemZFeatureAdd; def Arch11SupportedFeatures : SystemZFeatureAdd; +def Arch12SupportedFeatures + : SystemZFeatureAdd; -def Arch11UnsupportedFeatures +def Arch12UnsupportedFeatures : SystemZFeatureList<[]>; +def Arch11UnsupportedFeatures + : SystemZFeatureAdd; def Arch10UnsupportedFeatures : SystemZFeatureAdd; def Arch9UnsupportedFeatures diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2801141cd951..2d916d2e1521 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -101,7 +101,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); } - addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + if (Subtarget.hasVectorEnhancements1()) + addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); + else + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); if (Subtarget.hasVector()) { addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); @@ -316,7 +319,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::AND, VT, Legal); setOperationAction(ISD::OR, VT, Legal); setOperationAction(ISD::XOR, VT, Legal); - setOperationAction(ISD::CTPOP, VT, Custom); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::CTPOP, VT, Legal); + else + setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); @@ -414,10 +420,60 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FROUND, MVT::v2f64, Legal); } + // The vector enhancements facility 1 has instructions for these. + if (Subtarget.hasVectorEnhancements1()) { + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FABS, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMINNAN, MVT::f64, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); + setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f128, Legal); + setOperationAction(ISD::FMINNUM, MVT::f128, Legal); + setOperationAction(ISD::FMINNAN, MVT::f128, Legal); + } + // We have fused multiply-addition for f32 and f64 but not f128. setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMA, MVT::f64, Legal); - setOperationAction(ISD::FMA, MVT::f128, Expand); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FMA, MVT::f128, Legal); + else + setOperationAction(ISD::FMA, MVT::f128, Expand); + + // We don't have a copysign instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant @@ -425,6 +481,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + // We don't have extending load instruction on vector registers. + if (Subtarget.hasVectorEnhancements1()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + } + // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); @@ -489,7 +551,7 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { case MVT::f64: return true; case MVT::f128: - return false; + return Subtarget.hasVectorEnhancements1(); default: break; } @@ -1462,21 +1524,25 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { return true; case Intrinsic::s390_vfcedbs: + case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchdbs: + case Intrinsic::s390_vfchsbs: Opcode = SystemZISD::VFCMPHS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchedbs: + case Intrinsic::s390_vfchesbs: Opcode = SystemZISD::VFCMPHES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vftcidb: + case Intrinsic::s390_vftcisb: Opcode = SystemZISD::VFTCI; CCValid = SystemZ::CCMASK_VCMP; return true; @@ -2316,11 +2382,15 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, // producing a result of type VT. -static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, - EVT VT, SDValue CmpOp0, SDValue CmpOp1) { - // There is no hardware support for v4f32, so extend the vector into - // two v2f64s and compare those. - if (CmpOp0.getValueType() == MVT::v4f32) { +SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, + SDValue CmpOp1) const { + // There is no hardware support for v4f32 (unless we have the vector + // enhancements facility 1), so extend the vector into two v2f64s + // and compare those. + if (CmpOp0.getValueType() == MVT::v4f32 && + !Subtarget.hasVectorEnhancements1()) { SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); @@ -2334,9 +2404,11 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing // an integer mask of type VT. -static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - ISD::CondCode CC, SDValue CmpOp0, - SDValue CmpOp1) { +SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, + const SDLoc &DL, EVT VT, + ISD::CondCode CC, + SDValue CmpOp0, + SDValue CmpOp1) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); bool Invert = false; SDValue Cmp; @@ -2960,6 +3032,12 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, // We define this so that it can be used for constant division. lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + else if (Subtarget.hasMiscellaneousExtensions2()) + // SystemZISD::SMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::SMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else { // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: // @@ -4658,6 +4736,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); OPCODE(POPCNT); + OPCODE(SMUL_LOHI); OPCODE(UMUL_LOHI); OPCODE(SDIVREM); OPCODE(UDIVREM); @@ -6118,6 +6197,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: + case SystemZ::SelectVR128: return emitSelect(MI, MBB, 0); case SystemZ::CondStore8Mux: diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 6c9c404816f0..abe8b7233e60 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -88,6 +88,7 @@ enum NodeType : unsigned { // Wrappers around the ISD opcodes of the same name. The output is GR128. // Input operands may be GR64 or GR32, depending on the instruction. + SMUL_LOHI, UMUL_LOHI, SDIVREM, UDIVREM, @@ -479,6 +480,12 @@ class SystemZTargetLowering : public TargetLowering { const SystemZSubtarget &Subtarget; // Implement LowerOperation for individual opcodes. + SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, SDValue CmpOp1) const; + SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, + EVT VT, ISD::CondCode CC, + SDValue CmpOp0, SDValue CmpOp1) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 10172bd45203..02aeaadad0d9 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. -def SelectF32 : SelectWrapper; -def SelectF64 : SelectWrapper; -def SelectF128 : SelectWrapper; +def SelectF32 : SelectWrapper; +def SelectF64 : SelectWrapper; +let Predicates = [FeatureNoVectorEnhancements1] in + def SelectF128 : SelectWrapper; +let Predicates = [FeatureVectorEnhancements1] in + def SelectVR128 : SelectWrapper; defm CondStoreF32 : CondStores; @@ -69,8 +72,9 @@ let Defs = [CC], usesCustomInserter = 1 in { let Predicates = [FeatureVector] in { defm : CompareZeroFP; defm : CompareZeroFP; - defm : CompareZeroFP; } +let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in + defm : CompareZeroFP; // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; @@ -83,8 +87,12 @@ let isCodeGenOnly = 1 in { } // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP32:$src1, FP128:$src2), - (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))), + (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP64 result. let isCodeGenOnly = 1 in @@ -92,8 +100,12 @@ let isCodeGenOnly = 1 in def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>; // The sign of an FP128 is in the high register. -def : Pat<(fcopysign FP64:$src1, FP128:$src2), - (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>; +let Predicates = [FeatureVectorEnhancements1] in + def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))), + (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>; // fcopysign with an FP128 result. Use "upper" as the high half and leave // the low half as-is. @@ -101,12 +113,14 @@ class CopySign128 : Pat<(fcopysign FP128:$src1, cls:$src2), (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>; -def : CopySign128; -def : CopySign128; -def : CopySign128; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : CopySign128; + def : CopySign128; + def : CopySign128; +} defm LoadStoreF32 : MVCLoadStore; defm LoadStoreF64 : MVCLoadStore; @@ -166,20 +180,32 @@ def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>, def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>, Requires<[FeatureFPExtension]>; -def : Pat<(f32 (fpround FP128:$src)), - (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; -def : Pat<(f64 (fpround FP128:$src)), - (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f32 (fpround FP128:$src)), + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; + def : Pat<(f64 (fpround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; +} // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>; +def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>; + def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>; +} // Extend memory floating-point values to wider representations. def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; -def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; -def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; +def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; +let Predicates = [FeatureNoVectorEnhancements1] in { + def : Pat<(f128 (extloadf32 bdxaddr12only:$src)), + (LXEB bdxaddr12only:$src)>; + def : Pat<(f128 (extloadf64 bdxaddr12only:$src)), + (LXDB bdxaddr12only:$src)>; +} // Convert a signed integer register value to a floating-point one. def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; @@ -426,16 +452,18 @@ def : Pat<(fmul (f64 (fpextend FP32:$src1)), // f128 multiplication of two FP64 registers. def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), - (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), - FP64:$src1, subreg_h64), FP64:$src2)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_h64), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; -def : Pat<(fmul (f128 (fpextend FP64:$src1)), - (f128 (extloadf64 bdxaddr12only:$addr))), - (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), - bdxaddr12only:$addr)>; +let Predicates = [FeatureNoVectorEnhancements1] in + def : Pat<(fmul (f128 (fpextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64), + bdxaddr12only:$addr)>; // Fused multiply-add. def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 7620e06ccbc9..033a0a879d37 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1091,6 +1091,94 @@ class InstVRIe op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVRIf op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M5; + let Inst{19-12} = I4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIg op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<8> I3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = I4; + let Inst{23-20} = M5; + let Inst{19-12} = I3; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIh op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> I2; + bits<4> I3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = I3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIi op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> R2; + bits<8> I3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R2; + let Inst{31-24} = 0; + let Inst{23-20} = M4; + let Inst{19-12} = I3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + // Depending on the instruction mnemonic, certain bits may be or-ed into // the M4 value provided as explicit operand. These are passed as m4or. class InstVRRa op, dag outs, dag ins, string asmstr, list pattern, @@ -1259,6 +1347,67 @@ class InstVRRf op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVRRg op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = V1{3-0}; + let Inst{31-12} = 0; + let Inst{11} = 0; + let Inst{10} = V1{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRh op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = V1{3-0}; + let Inst{31-28} = V2{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M3; + let Inst{19-12} = 0; + let Inst{11} = 0; + let Inst{10} = V1{4}; + let Inst{9} = V2{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRi op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<5> V2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = 0; + let Inst{23-20} = M3; + let Inst{19-12} = 0; + let Inst{11} = 0; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + class InstVRSa op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -1321,6 +1470,25 @@ class InstVRSc op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVRSd op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<4> R3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = 0; + let Inst{35-32} = R3; + let Inst{31-16} = BD2; + let Inst{15-12} = V1{3-0}; + let Inst{11-9} = 0; + let Inst{8} = V1{4}; + let Inst{7-0} = op{7-0}; +} + class InstVRV op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -1358,6 +1526,24 @@ class InstVRX op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstVSI op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<8> I3; + + let Inst{47-40} = op{15-8}; + let Inst{39-32} = I3; + let Inst{31-16} = BD2; + let Inst{15-12} = V1{3-0}; + let Inst{11-9} = 0; + let Inst{8} = V1{4}; + let Inst{7-0} = op{7-0}; +} + //===----------------------------------------------------------------------===// // Instruction classes for .insn directives //===----------------------------------------------------------------------===// @@ -1910,6 +2096,25 @@ class FixedCondBranchRX opcode> let M1 = V.ccmask; } +class CondBranchRXY opcode> + : InstRXYb { + let CCMaskFirst = 1; +} + +class AsmCondBranchRXY opcode> + : InstRXYb; + +class FixedCondBranchRXY opcode, + SDPatternOperator operator = null_frag> + : InstRXYb { + let isAsmParserOnly = V.alternate; + let M1 = V.ccmask; +} + class CmpBranchRIEa opcode, RegisterOperand cls, Immediate imm> : InstRIEa opcode, let AccessBytes = bytes; } +class StoreLengthVRSd opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVRSd { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreLengthVSI opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVSI { + let mayStore = 1; + let AccessBytes = bytes; +} + class StoreMultipleRS opcode, RegisterOperand cls, AddressingMode mode = bdaddr12only> : InstRSa opcode, : InstRXa; +class SideEffectBinaryRXY opcode, + RegisterOperand cls> + : InstRXYa; + class SideEffectBinaryRILPC opcode, RegisterOperand cls> : InstRILb opcode> (ins VR128:$V2, imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5), mnemonic#"\t$V1, $V2, $I3, $M4, $M5", []>; +class BinaryVRIh opcode> + : InstVRIh; + class BinaryVRRa opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0> : InstVRRa opcode, SDPatternOperator operator, mnemonic#"\t$V1, $R2, $R3", [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>; +class BinaryVRRi opcode, RegisterOperand cls> + : InstVRRi; + class BinaryVRSa opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type> : InstVRSa opcode> (ins VR128:$V3, shift12only:$BD2, imm32zx4: $M4), mnemonic#"\t$R1, $V3, $BD2, $M4", []>; +class BinaryVRSd opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVRSd { + let mayLoad = 1; + let AccessBytes = bytes; +} + class BinaryVRX opcode, SDPatternOperator operator, TypedReg tr, bits<5> bytes> : InstVRX opcode, RegisterOperand cls> let mayStore = 1; } +class BinaryVSI opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVSI { + let mayLoad = 1; + let AccessBytes = bytes; +} + class StoreBinaryVRV opcode, bits<5> bytes, Immediate index> : InstVRV opcode> let M5 = 0; } +class CompareVRRh opcode> + : InstVRRh { + let isCompare = 1; +} + class TestRXE opcode, SDPatternOperator operator, RegisterOperand cls> : InstRXE opcode> let mayLoad = 1; } +class TestVRRg opcode> + : InstVRRg; + class SideEffectTernarySSc opcode> : InstSSc opcode, SDPatternOperator operator, let M5 = type; } +class TernaryVRIi opcode, RegisterOperand cls> + : InstVRIi; + class TernaryVRRa opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or> : InstVRRa opcode, SDPatternOperator operator, let M6 = 0; } +class TernaryVRRcFloat opcode, + SDPatternOperator operator, TypedReg tr1, TypedReg tr2, + bits<4> type = 0, bits<4> m5 = 0> + : InstVRRc { + let M4 = type; + let M5 = m5; +} + +class TernaryVRRcFloatGeneric opcode> + : InstVRRc; + class TernaryVRRd opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type = 0> : InstVRRd opcode> let DisableEncoding = "$V1src"; } +class QuaternaryVRIf opcode> + : InstVRIf; + +class QuaternaryVRIg opcode> + : InstVRIg; + class QuaternaryVRRd opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, - bits<4> type, SDPatternOperator m6mask, bits<4> m6or> + TypedReg tr3, TypedReg tr4, bits<4> type, + SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0> : InstVRRd { let M5 = type; } +class QuaternaryVRRdGeneric opcode> + : InstVRRd; + // Declare a pair of instructions, one which sets CC and one which doesn't. // The CC-setting form ends with "S" and sets the low bit of M6. // Also create aliases to make use of M6 operand optional in assembler. @@ -4041,13 +4348,15 @@ multiclass QuaternaryOptVRRdSPair opcode, SDPatternOperator operator_cc, TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> modifier = 0> { - def "" : QuaternaryVRRd; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; let Defs = [CC] in - def S : QuaternaryVRRd; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, @@ -4055,10 +4364,7 @@ multiclass QuaternaryOptVRRdSPair opcode, } multiclass QuaternaryOptVRRdSPairGeneric opcode> { - def "" : InstVRRd; + def "" : QuaternaryVRRdGeneric; def : InstAlias(NAME) VR128:$V1, VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5, 0)>; @@ -4366,10 +4672,10 @@ class RotateSelectRIEfPseudo // Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is // the value of the PSW's 2-bit condition code field. -class SelectWrapper +class SelectWrapper : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), - [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, + [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 66a5ff12be46..4533f4fdf21a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -869,6 +869,37 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Move 128-bit floating-point values between VR128 and FP128. + if (SystemZ::VR128BitRegClass.contains(DestReg) && + SystemZ::FP128BitRegClass.contains(SrcReg)) { + unsigned SrcRegHi = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned SrcRegLo = + RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) + .addReg(SrcRegHi, getKillRegState(KillSrc)) + .addReg(SrcRegLo, getKillRegState(KillSrc)); + return; + } + if (SystemZ::FP128BitRegClass.contains(DestReg) && + SystemZ::VR128BitRegClass.contains(SrcReg)) { + unsigned DestRegHi = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + unsigned DestRegLo = + RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64), + SystemZ::subreg_r64, &SystemZ::VR128BitRegClass); + + if (DestRegHi != SrcReg) + copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); + BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1); + return; + } + // Everything else needs only one instruction. unsigned Opcode; if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) @@ -1434,6 +1465,7 @@ SystemZII::Branch SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const { switch (MI.getOpcode()) { case SystemZ::BR: + case SystemZ::BI: case SystemZ::J: case SystemZ::JG: return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 4569be7602e4..f64c0d15ef83 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -48,6 +48,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { let isIndirectBranch = 1 in { def BC : CondBranchRX<"b#", 0x47>; def BCR : CondBranchRR<"b#r", 0x07>; + def BIC : CondBranchRXY<"bi#", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; } } @@ -58,6 +60,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { let isIndirectBranch = 1 in { def BCAsm : AsmCondBranchRX<"bc", 0x47>; def BCRAsm : AsmCondBranchRR<"bcr", 0x07>; + def BICAsm : AsmCondBranchRXY<"bic", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; } // Define AsmParser extended mnemonics for each general condition-code mask @@ -69,6 +73,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { let isIndirectBranch = 1 in { def BAsm#V : FixedCondBranchRX , "b#", 0x47>; def BRAsm#V : FixedCondBranchRR , "b#r", 0x07>; + def BIAsm#V : FixedCondBranchRXY, "bi#", 0xe347>, + Requires<[FeatureMiscellaneousExtensions2]>; } } } @@ -81,6 +87,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isIndirectBranch = 1 in { def B : FixedCondBranchRX; def BR : FixedCondBranchRR; + def BI : FixedCondBranchRXY, + Requires<[FeatureMiscellaneousExtensions2]>; } } @@ -316,9 +324,9 @@ let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in { // Select instructions //===----------------------------------------------------------------------===// -def Select32Mux : SelectWrapper, Requires<[FeatureHighWord]>; -def Select32 : SelectWrapper; -def Select64 : SelectWrapper; +def Select32Mux : SelectWrapper, Requires<[FeatureHighWord]>; +def Select32 : SelectWrapper; +def Select64 : SelectWrapper; // We don't define 32-bit Mux stores if we don't have STOCFH, because the // low-only STOC should then always be used if possible. @@ -921,6 +929,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of memory. defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>; defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>; + def AGH : BinaryRXY<"agh", 0xE338, add, GR64, asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>; def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>; @@ -1006,6 +1016,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of memory. defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>; defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; + def SGH : BinaryRXY<"sgh", 0xE339, sub, GR64, asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>; def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>; } @@ -1207,6 +1219,15 @@ defm : RMWIByte; // Multiplication //===----------------------------------------------------------------------===// +// Multiplication of a register, setting the condition code. We prefer these +// over MS(G)R if available, even though we cannot use the condition code, +// since they are three-operand instructions. +let Predicates = [FeatureMiscellaneousExtensions2], + Defs = [CC], isCommutable = 1 in { + def MSRKC : BinaryRRFa<"msrkc", 0xB9FD, mul, GR32, GR32, GR32>; + def MSGRKC : BinaryRRFa<"msgrkc", 0xB9ED, mul, GR64, GR64, GR64>; +} + // Multiplication of a register. let isCommutable = 1 in { def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>; @@ -1226,21 +1247,37 @@ def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; // Multiplication of memory. defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>; defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; +def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, asextloadi16, 2>, + Requires<[FeatureMiscellaneousExtensions2]>; def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>; def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; +// Multiplication of memory, setting the condition code. +let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in { + def MSC : BinaryRXY<"msc", 0xE353, null_frag, GR32, load, 4>; + def MSGC : BinaryRXY<"msgc", 0xE383, null_frag, GR64, load, 8>; +} + // Multiplication of a register, producing two results. -def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; +def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; +def MGRK : BinaryRRFa<"mgrk", 0xB9EC, null_frag, GR128, GR64, GR64>, + Requires<[FeatureMiscellaneousExtensions2]>; def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>; +def : Pat<(z_smul_lohi GR64:$src1, GR64:$src2), + (MGRK GR64:$src1, GR64:$src2)>; def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), (MLGR (AEXT128 GR64:$src1), GR64:$src2)>; // Multiplication of memory, producing two results. def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>; def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>; +def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>, + Requires<[FeatureMiscellaneousExtensions2]>; def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>; +def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), (MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; @@ -1765,8 +1802,29 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { GR128, GR128, GR128>; def PCC : SideEffectInherentRRE<"pcc", 0xB92C>; } + let Predicates = [FeatureMessageSecurityAssist5] in - def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>; + def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>; + let Predicates = [FeatureMessageSecurityAssist7], isAsmParserOnly = 1 in + def PRNO : SideEffectBinaryMemMemRRE<"prno", 0xB93C, GR128, GR128>; + + let Predicates = [FeatureMessageSecurityAssist8] in + def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929, + GR128, GR128, GR128>; +} + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureGuardedStorage] in { + def LGG : UnaryRXY<"lgg", 0xE34C, null_frag, GR64, 8>; + def LLGFSG : UnaryRXY<"llgfsg", 0xE348, null_frag, GR64, 4>; + + let mayLoad = 1 in + def LGSC : SideEffectBinaryRXY<"lgsc", 0xE34D, GR64>; + let mayStore = 1 in + def STGSC : SideEffectBinaryRXY<"stgsc", 0xE349, GR64>; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td index a9803c2d83e9..0112ebf1eb10 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrSystem.td @@ -126,6 +126,10 @@ let hasSideEffects = 1, Defs = [CC] in let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>; +// Insert reference bits multiple. +let Predicates = [FeatureInsertReferenceBitsMultiple], hasSideEffects = 1 in + def IRBM : UnaryRRE<"irbm", 0xB9AC, null_frag, GR64, GR64>; + // Perform frame management function. let hasSideEffects = 1 in def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 0158fe6aec08..c9a02d9c8082 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -14,7 +14,7 @@ let Predicates = [FeatureVector] in { // Register move. def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; - def VLR32 : UnaryAliasVRR; + def VLR32 : UnaryAliasVRR; def VLR64 : UnaryAliasVRR; // Load GR from VR element. @@ -141,7 +141,7 @@ let Predicates = [FeatureVector] in { // LEY and LDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VL32 : UnaryAliasVRX; + def VL32 : UnaryAliasVRX; def VL64 : UnaryAliasVRX; // Load logical element and zero. @@ -154,6 +154,11 @@ let Predicates = [FeatureVector] in { (VLLEZF bdxaddr12only:$addr)>; def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), (VLLEZG bdxaddr12only:$addr)>; + let Predicates = [FeatureVectorEnhancements1] in { + def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>; + def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)), + (VLLEZLF bdxaddr12only:$addr)>; + } // Load element. def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; @@ -170,6 +175,13 @@ let Predicates = [FeatureVector] in { def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>; } +let Predicates = [FeatureVectorPackedDecimal] in { + // Load rightmost with length. The number of loaded bytes is only known + // at run time. + def VLRL : BinaryVSI<"vlrl", 0xE635, int_s390_vlrl, 0>; + def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>; +} + // Use replicating loads if we're inserting a single element into an // undefined vector. This avoids a false dependency on the previous // register contents. @@ -219,7 +231,7 @@ let Predicates = [FeatureVector] in { // STEY and STDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VST32 : StoreAliasVRX; + def VST32 : StoreAliasVRX; def VST64 : StoreAliasVRX; // Scatter element. @@ -227,6 +239,13 @@ let Predicates = [FeatureVector] in { def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; } +let Predicates = [FeatureVectorPackedDecimal] in { + // Store rightmost with length. The number of stored bytes is only known + // at run time. + def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, int_s390_vstrl, 0>; + def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>; +} + //===----------------------------------------------------------------------===// // Selects and permutes //===----------------------------------------------------------------------===// @@ -256,6 +275,10 @@ let Predicates = [FeatureVector] in { // Permute doubleword immediate. def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>; + // Bit Permute. + let Predicates = [FeatureVectorEnhancements1] in + def VBPERM : BinaryVRRc<"vbperm", 0xE785, int_s390_vbperm, v128g, v128b>; + // Replicate. def VREP: BinaryVRIcGeneric<"vrep", 0xE74D>; def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>; @@ -424,6 +447,10 @@ let Predicates = [FeatureVector] in { def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; + // Not exclusive or. + let Predicates = [FeatureVectorEnhancements1] in + def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>; + // Exclusive or. def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; @@ -567,6 +594,17 @@ let Predicates = [FeatureVector] in { def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; + // Multiply sum logical. + let Predicates = [FeatureVectorEnhancements1] in { + def VMSL : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>; + def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg, + v128q, v128g, v128g, v128q, 3>; + } + + // Nand. + let Predicates = [FeatureVectorEnhancements1] in + def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>; + // Nor. def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>; @@ -574,9 +612,19 @@ let Predicates = [FeatureVector] in { // Or. def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; + // Or with complement. + let Predicates = [FeatureVectorEnhancements1] in + def VOC : BinaryVRRc<"voc", 0xE76F, null_frag, v128any, v128any>; + // Population count. def VPOPCT : UnaryVRRaGeneric<"vpopct", 0xE750>; def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; + let Predicates = [FeatureVectorEnhancements1] in { + def VPOPCTB : UnaryVRRa<"vpopctb", 0xE750, ctpop, v128b, v128b, 0>; + def VPOPCTH : UnaryVRRa<"vpopcth", 0xE750, ctpop, v128h, v128h, 1>; + def VPOPCTF : UnaryVRRa<"vpopctf", 0xE750, ctpop, v128f, v128f, 2>; + def VPOPCTG : UnaryVRRa<"vpopctg", 0xE750, ctpop, v128g, v128g, 3>; + } // Element rotate left logical (with vector shift amount). def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>; @@ -724,6 +772,14 @@ multiclass BitwiseVectorOps { (VNO VR128:$x, VR128:$y)>; def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>; } + let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(type (z_vnot (xor VR128:$x, VR128:$y))), + (VNX VR128:$x, VR128:$y)>; + def : Pat<(type (z_vnot (and VR128:$x, VR128:$y))), + (VNN VR128:$x, VR128:$y)>; + def : Pat<(type (or VR128:$x, (z_vnot VR128:$y))), + (VOC VR128:$x, VR128:$y)>; + } } defm : BitwiseVectorOps; @@ -879,6 +935,11 @@ let Predicates = [FeatureVector] in { def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; + def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>; + } // Convert from fixed 64-bit. def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; @@ -910,6 +971,11 @@ let Predicates = [FeatureVector] in { def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; + def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>; + } // Load FP integer. def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; @@ -917,66 +983,213 @@ let Predicates = [FeatureVector] in { def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; defm : VectorRounding; defm : VectorRounding; + let Predicates = [FeatureVectorEnhancements1] in { + def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; + def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>; + defm : VectorRounding; + defm : VectorRounding; + defm : VectorRounding; + } // Load lengthened. def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; - def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32eb, 2, 8>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + let isAsmParserOnly = 1 in { + def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; + def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; + def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + } + def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>; + def : Pat<(f128 (fpextend (f32 VR32:$src))), + (WFLLD (WLDEB VR32:$src))>; + } - // Load rounded, + // Load rounded. def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; - def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; - def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; - def : FPConversion; + def : FPConversion; + let Predicates = [FeatureVectorEnhancements1] in { + let isAsmParserOnly = 1 in { + def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; + def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } + def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>; + def : FPConversion; + def : Pat<(f32 (fpround (f128 VR128:$src))), + (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>; + } + + // Maximum. + multiclass VectorMax { + def : FPMinMax; + def : FPMinMax; + } + let Predicates = [FeatureVectorEnhancements1] in { + def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>; + def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb, + v128db, v128db, 3, 0>; + def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, + v64db, v64db, 3, 8>; + def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, + v128sb, v128sb, 2, 0>; + def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, + v32sb, v32sb, 2, 8>; + def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag, + v128xb, v128xb, 4, 8>; + defm : VectorMax; + defm : VectorMax; + defm : VectorMax; + defm : VectorMax; + defm : VectorMax; + } + + // Minimum. + multiclass VectorMin { + def : FPMinMax; + def : FPMinMax; + } + let Predicates = [FeatureVectorEnhancements1] in { + def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>; + def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb, + v128db, v128db, 3, 0>; + def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, + v64db, v64db, 3, 8>; + def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, + v128sb, v128sb, 2, 0>; + def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, + v32sb, v32sb, 2, 8>; + def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag, + v128xb, v128xb, 4, 8>; + defm : VectorMin; + defm : VectorMin; + defm : VectorMin; + defm : VectorMin; + defm : VectorMin; + } // Multiply. def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; + def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>; + } // Multiply and add. def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; + def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>; + } // Multiply and subtract. def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; + def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>; + } + + // Negative multiply and add. + let Predicates = [FeatureVectorEnhancements1] in { + def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; + def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>; + def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; + def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; + def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; + def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>; + } + + // Negative multiply and subtract. + let Predicates = [FeatureVectorEnhancements1] in { + def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; + def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>; + def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; + def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; + def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; + def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>; + } // Perform sign operation. def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>; def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>; def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>; + def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>; + def WFPSOXB : BinaryVRRa<"wfpsoxb", 0xE7CC, null_frag, v128xb, v128xb, 4, 8>; + } // Load complement. def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>; + def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>; + def WFLCXB : UnaryVRRa<"wflcxb", 0xE7CC, fneg, v128xb, v128xb, 4, 8, 0>; + } // Load negative. def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>; + def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>; + def WFLNXB : UnaryVRRa<"wflnxb", 0xE7CC, fnabs, v128xb, v128xb, 4, 8, 1>; + } // Load positive. def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>; + def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>; + def WFLPXB : UnaryVRRa<"wflpxb", 0xE7CC, fabs, v128xb, v128xb, 4, 8, 2>; + } // Square root. def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; + def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>; + } // Subtract. def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; + def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>; + } // Test data class immediate. let Defs = [CC] in { def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>; def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>; + def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>; + def WFTCIXB : BinaryVRIe<"wftcixb", 0xE74A, null_frag, v128q, v128xb, 4, 8>; + } } } @@ -989,12 +1202,20 @@ let Predicates = [FeatureVector] in { let Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; + def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>; + } } // Compare and signal scalar. let Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>; + def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>; + } } // Compare equal. @@ -1003,6 +1224,28 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128f, v128sb, 2, 0>; + defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + + // Compare and signal equal. + let Predicates = [FeatureVectorEnhancements1] in { + defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag, + v128g, v128db, 3, 4>; + defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKEXB : BinaryVRRcSPair<"wfkexb", 0xE7E8, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } // Compare high. def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>; @@ -1010,6 +1253,28 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128f, v128sb, 2, 0>; + defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + + // Compare and signal high. + let Predicates = [FeatureVectorEnhancements1] in { + defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag, + v128g, v128db, 3, 4>; + defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKHXB : BinaryVRRcSPair<"wfkhxb", 0xE7EB, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } // Compare high or equal. def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>; @@ -1017,6 +1282,28 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128f, v128sb, 2, 0>; + defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 8>; + defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 8>; + } + + // Compare and signal high or equal. + let Predicates = [FeatureVectorEnhancements1] in { + defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag, + v128g, v128db, 3, 4>; + defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 12>; + defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 12>; + defm WFKHEXB : BinaryVRRcSPair<"wfkhexb", 0xE7EA, null_frag, null_frag, + v128q, v128xb, 4, 12>; + } } //===----------------------------------------------------------------------===// @@ -1028,36 +1315,49 @@ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>; //===----------------------------------------------------------------------===// // Replicating scalars @@ -1133,6 +1433,20 @@ let AddedComplexity = 4 in { (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>; } +//===----------------------------------------------------------------------===// +// Support for 128-bit floating-point values in vector registers +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements1] in { + def : Pat<(f128 (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (f128 VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + + def : Pat<(f128 fpimm0), (VZERO)>; + def : Pat<(f128 fpimmneg0), (WFLNXB (VZERO))>; +} + //===----------------------------------------------------------------------===// // String instructions //===----------------------------------------------------------------------===// @@ -1202,3 +1516,37 @@ let Predicates = [FeatureVector] in { defm VSTRCZF : QuaternaryOptVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, z_vstrcz_cc, v128f, v128f, 2, 2>; } + +//===----------------------------------------------------------------------===// +// Packed-decimal instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorPackedDecimal] in { + def VLIP : BinaryVRIh<"vlip", 0xE649>; + + def VPKZ : BinaryVSI<"vpkz", 0xE634, null_frag, 0>; + def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>; + + let Defs = [CC] in { + def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>; + def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>; + def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>; + def VCVDG : TernaryVRIi<"vcvdg", 0xE65A, GR64>; + + def VAP : QuaternaryVRIf<"vap", 0xE671>; + def VSP : QuaternaryVRIf<"vsp", 0xE673>; + + def VMP : QuaternaryVRIf<"vmp", 0xE678>; + def VMSP : QuaternaryVRIf<"vmsp", 0xE679>; + + def VDP : QuaternaryVRIf<"vdp", 0xE67A>; + def VRP : QuaternaryVRIf<"vrp", 0xE67B>; + def VSDP : QuaternaryVRIf<"vsdp", 0xE67E>; + + def VSRP : QuaternaryVRIg<"vsrp", 0xE659>; + def VPSOP : QuaternaryVRIg<"vpsop", 0xE65B>; + + def VTP : TestVRRg<"vtp", 0xE65F>; + def VCP : CompareVRRh<"vcp", 0xE677>; + } +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td index 9c6d5819f8a7..759a8bb0ce14 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -181,6 +181,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, [SDNPInGlue]>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; +def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>; def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>; @@ -549,6 +550,12 @@ def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), (fma node:$src2, node:$src3, (fneg node:$src1))>; +// Negative fused multiply-add and multiply-subtract. +def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (fma node:$src1, node:$src2, node:$src3))>; +def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fneg (fms node:$src1, node:$src2, node:$src3))>; + // Floating-point negative absolute. def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; @@ -624,6 +631,19 @@ def z_vllezf64 : PatFrag<(ops node:$addr), (scalar_to_vector (f64 (load node:$addr))), (z_vzero))>; +// Similarly for the high element of a zeroed vector. +def z_vllezli32 : z_vllez; +def z_vllezlf32 : PatFrag<(ops node:$addr), + (bitconvert + (z_merge_high + (v2i64 + (bitconvert + (z_merge_high + (v4f32 (scalar_to_vector + (f32 (load node:$addr)))), + (v4f32 (z_vzero))))), + (v2i64 (z_vzero))))>; + // Store one element of a vector. class z_vste : PatFrag<(ops node:$vec, node:$addr, node:$index), diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td index 16a7ed784d70..152521fb66a8 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -167,3 +167,10 @@ class FPConversion suppress, bits<4> mode> : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), (insn tr2.op:$vec, suppress, mode)>; + +// Use INSN to perform mininum/maximum operation OPERATOR on type TR. +// FUNCTION is the type of minimum/maximum function to perform. +class FPMinMax function> + : Pat<(tr.vt (operator (tr.vt tr.op:$vec1), (tr.vt tr.op:$vec2))), + (insn tr.op:$vec1, tr.op:$vec2, function)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td index 1cdc0949ff4a..0dca4582dc0d 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -33,3 +33,6 @@ def : ProcessorModel<"zEC12", ZEC12Model, Arch10SupportedFeatures.List>; def : ProcessorModel<"arch11", Z13Model, Arch11SupportedFeatures.List>; def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>; +def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>; +def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index 36809ea81dc1..52ba1a584017 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -260,10 +260,10 @@ defm VF128 : SystemZRegClass<"VF128", // All vector registers. defm VR128 : SystemZRegClass<"VR128", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, - (add (sequence "V%u", 0, 7), - (sequence "V%u", 16, 31), - (sequence "V%u", 8, 15))>; + [f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (add (sequence "V%u", 0, 7), + (sequence "V%u", 16, 31), + (sequence "V%u", 8, 15))>; // Attaches a ValueType to a register operand, to make the instruction // definitions easier. @@ -272,7 +272,8 @@ class TypedReg { RegisterOperand op = opin; } -def v32eb : TypedReg; +def v32f : TypedReg; +def v32sb : TypedReg; def v64g : TypedReg; def v64db : TypedReg; def v128b : TypedReg; @@ -280,8 +281,9 @@ def v128h : TypedReg; def v128f : TypedReg; def v128g : TypedReg; def v128q : TypedReg; -def v128eb : TypedReg; +def v128sb : TypedReg; def v128db : TypedReg; +def v128xb : TypedReg; def v128any : TypedReg; //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td index 1ce0168f95e9..8dba89f70a42 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td @@ -59,7 +59,7 @@ def FPU2 : SchedWrite; def DFU : SchedWrite; def DFU2 : SchedWrite; -// Vector sub units (z13) +// Vector sub units (z13 and later) def VecBF : SchedWrite; def VecBF2 : SchedWrite; def VecDF : SchedWrite; @@ -75,6 +75,7 @@ def VecXsPm : SchedWrite; def VBU : SchedWrite; +include "SystemZScheduleZ14.td" include "SystemZScheduleZ13.td" include "SystemZScheduleZEC12.td" include "SystemZScheduleZ196.td" diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td new file mode 100644 index 000000000000..f11177af91a5 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -0,0 +1,1611 @@ +//-- SystemZScheduleZ14.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z14 to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def Z14Model : SchedMachineModel { + + let UnsupportedFeatures = Arch12UnsupportedFeatures.List; + + let IssueWidth = 8; + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z14Model in { + +// These definitions could be put in a subtarget common include file, +// but it seems the include system in Tablegen currently rejects +// multiple includes of same file. +def : WriteRes { + let NumMicroOps = 0; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 0; + let BeginGroup = 1; +} +def : WriteRes { + let NumMicroOps = 0; + let EndGroup = 1; +} +def : WriteRes { let Latency = 2; let NumMicroOps = 0;} +def : WriteRes { let Latency = 3; let NumMicroOps = 0;} +def : WriteRes { let Latency = 4; let NumMicroOps = 0;} +def : WriteRes { let Latency = 5; let NumMicroOps = 0;} +def : WriteRes { let Latency = 6; let NumMicroOps = 0;} +def : WriteRes { let Latency = 7; let NumMicroOps = 0;} +def : WriteRes { let Latency = 8; let NumMicroOps = 0;} +def : WriteRes { let Latency = 9; let NumMicroOps = 0;} +def : WriteRes { let Latency = 10; let NumMicroOps = 0;} +def : WriteRes { let Latency = 11; let NumMicroOps = 0;} +def : WriteRes { let Latency = 12; let NumMicroOps = 0;} +def : WriteRes { let Latency = 15; let NumMicroOps = 0;} +def : WriteRes { let Latency = 20; let NumMicroOps = 0;} +def : WriteRes { let Latency = 30; let NumMicroOps = 0;} + +// Execution units. +def Z14_FXaUnit : ProcResource<2>; +def Z14_FXbUnit : ProcResource<2>; +def Z14_LSUnit : ProcResource<2>; +def Z14_VecUnit : ProcResource<2>; +def Z14_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z14_VBUnit : ProcResource<2>; + +// Subtarget specific definitions of scheduling resources. +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 9; } +def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 9; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes + { let Latency = 30; } +def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 3; } +def : WriteRes; // Virtual Branching Unit + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[VBU], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[VBU], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[FXa, FXa, FXb, FXb, Lat4, GroupAlone], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[FXb], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[FXb, FXb, Lat2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[VBU], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[FXb], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[FXb], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[VBU, FXa, FXa, Lat3, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BRASL$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BAS(R)?$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[FXb, EndGroup], (instregex "Return$")>; +def : InstRW<[FXb], (instregex "CondReturn$")>; + +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + +// Select pseudo +def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>; + +// CondStore pseudos +def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[FXb, LSU, Lat5], (instregex "MV(G|H)?HI$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[FXb, LSU, LSU, LSU, Lat8, GroupAlone], (instregex "MVC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>; + +// Pseudo -> reg move +def : InstRW<[FXa], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[FXa], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[FXa], (instregex "INSERT_SUBREG$")>; +def : InstRW<[FXa], (instregex "REG_SEQUENCE$")>; +def : InstRW<[FXa], (instregex "SUBREG_TO_REG$")>; + +// Loads +def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux|CBB)?$")>; +def : InstRW<[LSU], (instregex "LG(RL)?$")>; +def : InstRW<[LSU], (instregex "L128$")>; + +def : InstRW<[FXa], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[FXa], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[FXa], (instregex "LG(F|H)I$")>; +def : InstRW<[FXa], (instregex "LHI(Mux)?$")>; +def : InstRW<[FXa], (instregex "LR(Mux)?$")>; + +// Load and zero rightmost byte +def : InstRW<[LSU], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[FXb, LSU, Lat5], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[FXa, LSU, Lat5], (instregex "LT(G)?$")>; +def : InstRW<[FXa], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[FXb, LSU, Lat5], (instregex "STG(RL)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "ST128$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "L(B|H|G)R$")>; +def : InstRW<[FXa], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[FXa, LSU, Lat5], (instregex "LTGF$")>; +def : InstRW<[FXa], (instregex "LTGFR$")>; + +def : InstRW<[FXa, LSU, Lat5], (instregex "LB(H|Mux)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LH(Y)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LG(B|H|F)$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LLCR(Mux)?$")>; +def : InstRW<[FXa], (instregex "LLHR(Mux)?$")>; +def : InstRW<[FXa], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSU], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSU], (instregex "LLH(Mux)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LL(C|H)H$")>; +def : InstRW<[LSU], (instregex "LLHRL$")>; +def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSU], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[FXb, LSU, Lat5], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], + (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; + +// Store multiple (estimated average of ceil(5/2) FXb ops) +def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10, + GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LRV(G)?R$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "LRV(G|H)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STRV(G|H)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[FXa], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "LP(G)?R$")>; +def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "L(N|P)GFR$")>; +def : InstRW<[FXa], (instregex "LN(R|GR)$")>; +def : InstRW<[FXa], (instregex "LC(R|GR)$")>; +def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "IC(Y)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "IC32(Y)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "ICM(H|Y)?$")>; +def : InstRW<[FXa], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[FXa], (instregex "IIHF(64)?$")>; +def : InstRW<[FXa], (instregex "IIHH(64)?$")>; +def : InstRW<[FXa], (instregex "IIHL(64)?$")>; +def : InstRW<[FXa], (instregex "IILF(64)?$")>; +def : InstRW<[FXa], (instregex "IILH(64)?$")>; +def : InstRW<[FXa], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "A(Y)?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "AH(Y)?$")>; +def : InstRW<[FXa], (instregex "AIH$")>; +def : InstRW<[FXa], (instregex "AFI(Mux)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "AG$")>; +def : InstRW<[FXa], (instregex "AGFI$")>; +def : InstRW<[FXa], (instregex "AGHI(K)?$")>; +def : InstRW<[FXa], (instregex "AGR(K)?$")>; +def : InstRW<[FXa], (instregex "AHI(K)?$")>; +def : InstRW<[FXa], (instregex "AHIMux(K)?$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "AL(Y)?$")>; +def : InstRW<[FXa], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "ALG(F)?$")>; +def : InstRW<[FXa], (instregex "ALGHSIK$")>; +def : InstRW<[FXa], (instregex "ALGF(I|R)$")>; +def : InstRW<[FXa], (instregex "ALGR(K)?$")>; +def : InstRW<[FXa], (instregex "ALR(K)?$")>; +def : InstRW<[FXa], (instregex "AR(K)?$")>; +def : InstRW<[FXa], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXa], (instregex "ALSIH(N)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "ALC(G)?$")>; +def : InstRW<[FXa, Lat2, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[FXa, LSU, Lat6], (instregex "AG(F|H)$")>; +def : InstRW<[FXa, Lat2], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "S(G|Y)?$")>; +def : InstRW<[FXa, LSU, Lat6], (instregex "SH(Y)?$")>; +def : InstRW<[FXa], (instregex "SGR(K)?$")>; +def : InstRW<[FXa], (instregex "SLFI$")>; +def : InstRW<[FXa, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[FXa], (instregex "SLGF(I|R)$")>; +def : InstRW<[FXa], (instregex "SLGR(K)?$")>; +def : InstRW<[FXa], (instregex "SLR(K)?$")>; +def : InstRW<[FXa], (instregex "SR(K)?$")>; +def : InstRW<[FXa], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>; +def : InstRW<[FXa, Lat2, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[FXa, LSU, Lat6], (instregex "SG(F|H)$")>; +def : InstRW<[FXa, Lat2], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "N(G|Y)?$")>; +def : InstRW<[FXa], (instregex "NGR(K)?$")>; +def : InstRW<[FXa], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "NI(Y)?$")>; +def : InstRW<[FXa], (instregex "NIHF(64)?$")>; +def : InstRW<[FXa], (instregex "NIHH(64)?$")>; +def : InstRW<[FXa], (instregex "NIHL(64)?$")>; +def : InstRW<[FXa], (instregex "NILF(64)?$")>; +def : InstRW<[FXa], (instregex "NILH(64)?$")>; +def : InstRW<[FXa], (instregex "NILL(64)?$")>; +def : InstRW<[FXa], (instregex "NR(K)?$")>; +def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "O(G|Y)?$")>; +def : InstRW<[FXa], (instregex "OGR(K)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "OI(Y)?$")>; +def : InstRW<[FXa], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[FXa], (instregex "OIHF(64)?$")>; +def : InstRW<[FXa], (instregex "OIHH(64)?$")>; +def : InstRW<[FXa], (instregex "OIHL(64)?$")>; +def : InstRW<[FXa], (instregex "OILF(64)?$")>; +def : InstRW<[FXa], (instregex "OILH(64)?$")>; +def : InstRW<[FXa], (instregex "OILL(64)?$")>; +def : InstRW<[FXa], (instregex "OR(K)?$")>; +def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat5], (instregex "X(G|Y)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "XI(Y)?$")>; +def : InstRW<[FXa], (instregex "XIFMux$")>; +def : InstRW<[FXa], (instregex "XGR(K)?$")>; +def : InstRW<[FXa], (instregex "XIHF(64)?$")>; +def : InstRW<[FXa], (instregex "XILF(64)?$")>; +def : InstRW<[FXa], (instregex "XR(K)?$")>; +def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat9], (instregex "MS(GF|Y)?$")>; +def : InstRW<[FXa, Lat5], (instregex "MS(R|FI)$")>; +def : InstRW<[FXa, LSU, Lat11], (instregex "MSG$")>; +def : InstRW<[FXa, Lat7], (instregex "MSGR$")>; +def : InstRW<[FXa, Lat5], (instregex "MSGF(I|R)$")>; +def : InstRW<[FXa2, LSU, Lat12, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXa2, Lat8, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXa, Lat4], (instregex "MGHI$")>; +def : InstRW<[FXa, Lat4], (instregex "MHI$")>; +def : InstRW<[FXa, LSU, Lat8], (instregex "MH(Y)?$")>; +def : InstRW<[FXa2, Lat6, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXa2, LSU, Lat10, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXa, LSU, Lat8], (instregex "MGH$")>; +def : InstRW<[FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>; +def : InstRW<[FXa, Lat8, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[FXa, LSU, Lat9, GroupAlone], (instregex "MSC$")>; +def : InstRW<[FXa, LSU, Lat11, GroupAlone], (instregex "MSGC$")>; +def : InstRW<[FXa, Lat5], (instregex "MSRKC$")>; +def : InstRW<[FXa, Lat7], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>; +def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>; +def : InstRW<[FXa2, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[LSU, FXa2, Lat30, GroupAlone], (instregex "DSG(F)?$")>; +def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>; +def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "SLL(G|K)?$")>; +def : InstRW<[FXa], (instregex "SRL(G|K)?$")>; +def : InstRW<[FXa], (instregex "SRA(G|K)?$")>; +def : InstRW<[FXa], (instregex "SLA(G|K)?$")>; +def : InstRW<[FXa, FXa, FXa, FXa, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[FXa], (instregex "RISBG(N|32)?$")>; +def : InstRW<[FXa], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[FXa], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[FXa], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>; +def : InstRW<[FXb], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[FXb], (instregex "CG(F|H)I$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CG(HSI|RL)$")>; +def : InstRW<[FXb], (instregex "C(G)?R$")>; +def : InstRW<[FXb], (instregex "CIH$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CH(F|SI)$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>; +def : InstRW<[FXb], (instregex "CLFI(Mux)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLGF(RL)?$")>; +def : InstRW<[FXb], (instregex "CLGF(I|R)$")>; +def : InstRW<[FXb], (instregex "CLGR$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLGRL$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>; +def : InstRW<[FXb], (instregex "CLIH$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>; +def : InstRW<[FXb], (instregex "CLR$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXb], (instregex "C(L)?HHR$")>; +def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>; +def : InstRW<[FXb, LSU, Lat6], (instregex "CGH(RL)?$")>; +def : InstRW<[FXa, FXb, LSU, Lat6, BeginGroup], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[FXb, LSU, Lat6], (instregex "CGF(RL)?$")>; +def : InstRW<[FXb, Lat2], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "CLC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[FXb, LSU, Lat5], (instregex "TM(Y)?$")>; +def : InstRW<[FXb], (instregex "TM(H|L)Mux$")>; +def : InstRW<[FXb], (instregex "TMHH(64)?$")>; +def : InstRW<[FXb], (instregex "TMHL(64)?$")>; +def : InstRW<[FXb], (instregex "TMLH(64)?$")>; +def : InstRW<[FXb], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[FXb, LSU, Lat6], (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU], (instregex "PFD(RL)?$")>; +def : InstRW<[FXb, Lat2], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[FXb], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[FXb, LSU, Lat5], (instregex "LAA(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAAL(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAN(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAO(G)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[FXb, LSU, Lat5, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[FXa, FXb, LSU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[FXa, FXa, FXb, FXb, FXa, LSU, Lat10, GroupAlone], + (instregex "CDS(Y)?$")>; +def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone], + (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[FXa, LSU, Lat30], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[FXb, FXb, LSU, Lat6, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[FXa, FXa, FXa, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXa, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, Lat30], (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(KIMD|KLMD|KMAC)$")>; +def : InstRW<[FXa, Lat30], (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU], (instregex "LGG$")>; +def : InstRW<[LSU, Lat5], (instregex "LLGFSG$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, VecDF, VecDF, LSU, LSU, Lat30, GroupAlone], + (instregex "CVBG$")>; +def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXb, FXb, FXb, VecDF2, VecDF2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXb, VecDF, FXb, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "UNPK$")>; + +def : InstRW<[FXb, VecDFX, LSU, LSU, LSU, Lat9, GroupAlone], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[FXb, VecDFX2, VecDFX2, LSU, LSU, LSU, Lat30, GroupAlone], + (instregex "(M|D)P$")>; +def : InstRW<[FXb, VecDFX, VecDFX, LSU, LSU, Lat15, GroupAlone], + (instregex "SRP$")>; +def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>; +def : InstRW<[VecDFX, LSU, Lat4, BeginGroup], (instregex "TP$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[LSU, FXa, Lat5, BeginGroup], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[FXa, Lat3, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[FXa, FXa, FXb, Lat5, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[FXb], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[FXb, Lat2, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[FXa, FXb, Lat2, GroupAlone], (instregex "BSM$")>; +def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat15, GroupAlone], + (instregex "TBEGIN(C|_nofloat)?$")>; + +// Transaction end +def : InstRW<[FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[FXa], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[FXa, FXa, Lat4, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>; + +// Extend +def : InstRW<[FXa], (instregex "AEXT128$")>; +def : InstRW<[FXa], (instregex "ZEXT128$")>; + +// String instructions +def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>; +def : InstRW<[FXa, Lat30], (instregex "SRSTU$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXa, Lat30], (instregex "CMPSC$")>; + +// Execute +def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Select instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa], (instregex "Select(F32|F64|F128|VR128)$")>; +def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>; +def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[FXb], (instregex "LZ(DR|ER)$")>; +def : InstRW<[FXb, FXb, Lat2, BeginGroup], (instregex "LZXR$")>; + +// Load +def : InstRW<[VecXsPm], (instregex "LER$")>; +def : InstRW<[FXb], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[FXb, Lat3], (instregex "LGDR$")>; +def : InstRW<[FXb, FXb, Lat2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)BR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "LTEBRCompare(_VecPseudo)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "LTDBRCompare(_VecPseudo)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXBR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], + (instregex "LTXBRCompare(_VecPseudo)?$")>; + +// Copy sign +def : InstRW<[VecXsPm], (instregex "CPSDRd(d|s)$")>; +def : InstRW<[VecXsPm], (instregex "CPSDRs(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm, LSU, Lat7], (instregex "LE(Y)?$")>; +def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSU], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat7], (instregex "STD(Y)?$")>; +def : InstRW<[FXb, LSU, Lat7], (instregex "STE(Y)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecBF], (instregex "LEDBR(A)?$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LEXBR(A)?$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXBR(A)?$")>; + +// Load lengthened +def : InstRW<[VecBF, LSU, Lat12], (instregex "LDEB$")>; +def : InstRW<[VecBF], (instregex "LDEBR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12 , GroupAlone], (instregex "LX(D|E)B$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)BR$")>; + +// Convert from fixed / logical +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)BR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)BR(A)?$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CEL(F|G)BR$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CDL(F|G)BR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)BR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)BR(A)?$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[FXb, VecBF, Lat11, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLFDBR$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLG(E|D)BR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DBR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)EBR$")>; +def : InstRW<[FXb], (instregex "LCDFR(_32)?$")>; +def : InstRW<[FXb], (instregex "LNDFR(_32)?$")>; +def : InstRW<[FXb], (instregex "LPDFR(_32)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)B$")>; +def : InstRW<[VecFPd], (instregex "SQ(E|D)BR$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[VecBF], (instregex "FIEBR(A)?$")>; +def : InstRW<[VecBF], (instregex "FIDBR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>; +def : InstRW<[VecBF], (instregex "A(E|D)BR$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>; +def : InstRW<[VecBF], (instregex "S(E|D)BR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|EE)B$")>; +def : InstRW<[VecBF], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXDB$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[VecFPd, LSU], (instregex "D(E|D)B$")>; +def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[VecFPd, Lat30], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecXsPm, LSU, Lat8], (instregex "(K|C)(E|D)B$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(K|C)(E|D)BR?$")>; +def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[LSU, VecXsPm, Lat9], (instregex "TC(E|D)B$")>; +def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[FXa, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "LFAS$")>; +def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>; +def : InstRW<[VecBF], (instregex "LEXR$")>; +def : InstRW<[VecDF2], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXb], (instregex "LDER$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[VecBF], (instregex "THD(E)?R$")>; +def : InstRW<[VecBF], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[VecBF], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>; +def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[VecBF], (instregex "FIER$")>; +def : InstRW<[VecBF], (instregex "FIDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MYR$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAYR$")>; + +// Division +def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>; +def : InstRW<[VecFPd], (instregex "D(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecBF, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[VecBF], (instregex "C(E|D)R$")>; +def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecDF], (instregex "LTDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[VecDF], (instregex "LDETR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[FXb, Lat30], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[VecDF], (instregex "FIDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecDF], (instregex "ADTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[VecDF], (instregex "SDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[VecDF], (instregex "QADTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "RRDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, VecDF, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "IEDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecDF], (instregex "(K|C)DTR$")>; +def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[VecDF], (instregex "CEDTR$")>; +def : InstRW<[VecDF], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "VLR(32|64)?$")>; +def : InstRW<[FXb, Lat4], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[FXb], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[FXb, Lat2], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VZERO$")>; +def : InstRW<[VecXsPm], (instregex "VONE$")>; +def : InstRW<[VecXsPm], (instregex "VGBM$")>; +def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU], (instregex "VL(L|BB)?$")>; +def : InstRW<[LSU], (instregex "VL(32|64)$")>; +def : InstRW<[LSU], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSU], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, LSU, Lat7], (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VGE(F|G)$")>; +def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], + (instregex "VLM$")>; +def : InstRW<[LSU, Lat5], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat8], (instregex "VST(L|32|64)?$")>; +def : InstRW<[FXb, LSU, Lat8], (instregex "VSTE(F|G)$")>; +def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VSTE(B|H)$")>; +def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat20, GroupAlone], + (instregex "VSTM$")>; +def : InstRW<[FXb, FXb, LSU, Lat12, BeginGroup], (instregex "VSCE(F|G)$")>; +def : InstRW<[FXb, LSU, Lat8], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VPERM$")>; +def : InstRW<[VecXsPm], (instregex "VPDI$")>; +def : InstRW<[VecXsPm], (instregex "VBPERM$")>; +def : InstRW<[VecXsPm], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPL(B|F)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[VecXsPm], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[VecXsPm], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[VecXsPm], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[VecXsPm], (instregex "VO(C)?$")>; +def : InstRW<[VecMul], (instregex "VCKSM$")>; +def : InstRW<[VecXsPm], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VX$")>; +def : InstRW<[VecMul], (instregex "VGFM?$")>; +def : InstRW<[VecMul], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[VecMul], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[VecXsPm], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAL(B|F)?$")>; +def : InstRW<[VecMul], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[VecMul], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VME(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VML(B|F)?$")>; +def : InstRW<[VecMul], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[VecMul], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[VecMul], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[VecBF2], (instregex "VMSL(G)?$")>; + +def : InstRW<[VecXsPm], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[VecXsPm], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>; +def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSLB$")>; +def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>; +def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSR(A|L)B$")>; + +def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[VecXsPm], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[VecMul], (instregex "VSUM(B|H)?$")>; +def : InstRW<[VecMul], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[VecMul], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm, Lat4], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[VecXsPm], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[VecStr, Lat5], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[VecBF], (instregex "VCD(L)?G$")>; +def : InstRW<[VecBF], (instregex "VCD(L)?GB$")>; +def : InstRW<[VecBF], (instregex "WCD(L)?GB$")>; +def : InstRW<[VecBF], (instregex "VC(L)?GD$")>; +def : InstRW<[VecBF], (instregex "VC(L)?GDB$")>; +def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>; +def : InstRW<[VecBF], (instregex "VL(DE|ED)$")>; +def : InstRW<[VecBF], (instregex "VL(DE|ED)B$")>; +def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; +def : InstRW<[VecBF], (instregex "VFL(L|R)$")>; +def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>; +def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>; +def : InstRW<[VecBF2], (instregex "WFLLD$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFLRX$")>; +def : InstRW<[VecBF2], (instregex "VFI$")>; +def : InstRW<[VecBF], (instregex "VFIDB$")>; +def : InstRW<[VecBF], (instregex "WFIDB$")>; +def : InstRW<[VecBF2], (instregex "VFISB$")>; +def : InstRW<[VecBF], (instregex "WFISB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[VecXsPm], (instregex "VFPSO$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[VecXsPm], (instregex "WFPSOXB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; +def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; +def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>; +def : InstRW<[VecDF2, Lat10], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[VecBF2], (instregex "VFM$")>; +def : InstRW<[VecBF], (instregex "VFMDB$")>; +def : InstRW<[VecBF], (instregex "WFMDB$")>; +def : InstRW<[VecBF2], (instregex "VFMSB$")>; +def : InstRW<[VecBF], (instregex "WFMSB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WFMXB$")>; +def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[VecDF2, Lat20], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[VecFPd], (instregex "VFD$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>; +def : InstRW<[VecFPd], (instregex "WFDXB$")>; +def : InstRW<[VecFPd], (instregex "VFSQ$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>; +def : InstRW<[VecFPd], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>; +def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>; +def : InstRW<[VecDFX], (instregex "WF(C|K)(E|H|HE)XB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)(E|H|HE)XBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>; +def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "LEFR$")>; +def : InstRW<[FXb, Lat4], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecStr], (instregex "VFAE(B)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFAEBS$")>; +def : InstRW<[VecStr], (instregex "VFAE(F|H)$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFAE(F|H)S$")>; +def : InstRW<[VecStr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[VecStr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[VecStr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[VecStr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[VecStr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[VecDF, VecDF, Lat10, GroupAlone], (instregex "VLIP$")>; +def : InstRW<[VecDFX, LSU, Lat12, GroupAlone], (instregex "VPKZ$")>; +def : InstRW<[VecDFX, FXb, LSU, Lat12, GroupAlone], (instregex "VUPKZ$")>; +def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVB(G)?$")>; +def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVD(G)?$")>; +def : InstRW<[VecDFX], (instregex "V(A|S)P$")>; +def : InstRW<[VecDF, VecDF, Lat30, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[VecDF, VecDF, Lat30, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[VecDFX, Lat30, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[VecDF, VecDF, Lat11], (instregex "VSRP$")>; +def : InstRW<[VecDFX], (instregex "VPSOP$")>; +def : InstRW<[VecDFX], (instregex "V(T|C)P$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXa, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXa, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXb, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXb, Lat30], (instregex "IRBM$")>; +def : InstRW<[FXb, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXb, Lat30], (instregex "TB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXb, Lat30], (instregex "PR$")>; +def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXb, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30, EndGroup], (instregex "BAKR$")>; +def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[LSU, LSU, GroupAlone], (instregex "SPT$")>; +def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], + (instregex "STCK(F)?$")>; +def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], + (instregex "STCKE$")>; +def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[LSU, LSU, FXb, Lat5, BeginGroup], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXb, Lat30], (instregex "PTF$")>; +def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "SVC$")>; +def : InstRW<[FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[FXb, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXb], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXb, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "LPP$")>; +def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXb, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "L(P|S)CTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXb, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXb, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXb, Lat30], (instregex "SAL$")>; + +} + diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index e3e1999d8ad8..4d986e8391cf 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -311,7 +311,7 @@ def : InstRW<[FXU], (instregex "ALGR(K)?$")>; def : InstRW<[FXU], (instregex "ALR(K)?$")>; def : InstRW<[FXU], (instregex "AR(K)?$")>; def : InstRW<[FXU], (instregex "A(L)?HHHR$")>; -def : InstRW<[FXU, FXU, Lat3], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "A(L)?HHLR$")>; def : InstRW<[FXU], (instregex "ALSIH(N)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>; @@ -337,7 +337,7 @@ def : InstRW<[FXU], (instregex "SLGR(K)?$")>; def : InstRW<[FXU], (instregex "SLR(K)?$")>; def : InstRW<[FXU], (instregex "SR(K)?$")>; def : InstRW<[FXU], (instregex "S(L)?HHHR$")>; -def : InstRW<[FXU, FXU, Lat3], (instregex "S(L)?HHLR$")>; +def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "S(L)?HHLR$")>; // Subtraction with borrow def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>; @@ -403,13 +403,13 @@ def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>; def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>; def : InstRW<[FXU, Lat8], (instregex "MSGR$")>; def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>; -def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; -def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXU, FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXU, FXU, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXU, Lat5], (instregex "MGHI$")>; def : InstRW<[FXU, Lat5], (instregex "MHI$")>; def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>; -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXU, FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder @@ -436,7 +436,8 @@ def : InstRW<[FXU], (instregex "SLL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRA(G|K)?$")>; def : InstRW<[FXU, Lat2], (instregex "SLA(G|K)?$")>; -def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>; +def : InstRW<[FXU, FXU, FXU, FXU, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -474,7 +475,7 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>; def : InstRW<[FXU], (instregex "CLR$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>; def : InstRW<[FXU], (instregex "C(L)?HHR$")>; -def : InstRW<[FXU, FXU, Lat3], (instregex "C(L)?HLR$")>; +def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "C(L)?HLR$")>; // Compare halfword def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>; @@ -499,7 +500,7 @@ def : InstRW<[FXU], (instregex "TMLH(64)?$")>; def : InstRW<[FXU], (instregex "TMLL(64)?$")>; // Compare logical characters under mask -def : InstRW<[FXU, LSU, Lat5], (instregex "CLM(H|Y)?$")>; +def : InstRW<[FXU, FXU, LSU, Lat5, GroupAlone], (instregex "CLM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Prefetch @@ -532,7 +533,7 @@ def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone], (instregex "CDSG$")>; // Compare and swap and store -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSST$")>; // Perform locked operation def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; @@ -548,36 +549,44 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; // Translate and convert //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXU, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; //===----------------------------------------------------------------------===// // Message-security assist //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; +def : InstRW<[FXU, Lat30], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; -def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[FXU, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "CVBG$")>; +def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXU, FXU, FXU, DFU2, DFU2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXU, FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat15, GroupAlone], (instregex "(A|S|ZA)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "(M|D)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, FXU, DFU2, DFU2, LSU, LSU, LSU, Lat15, GroupAlone], (instregex "SRP$")>; -def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; -def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>; +def : InstRW<[DFU2, DFU2, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; +def : InstRW<[DFU2, LSU, LSU, GroupAlone], (instregex "TP$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; //===----------------------------------------------------------------------===// @@ -621,7 +630,7 @@ def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "BASSM$")>; //===----------------------------------------------------------------------===// // Find leftmost one -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; // Population count def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; @@ -632,14 +641,14 @@ def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; -def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[FXU, Lat30], (instregex "SRSTU$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; // Various complex instructions -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXU, Lat30], (instregex "CMPSC$")>; // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; @@ -780,9 +789,9 @@ def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>; // Multiply and add / subtract -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>; // Division @@ -791,7 +800,7 @@ def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>; // Divide to integer -def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; +def : InstRW<[FPU, Lat30], (instregex "DI(E|D)BR$")>; //===----------------------------------------------------------------------===// // FP: Comparisons @@ -813,9 +822,9 @@ def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>; def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>; -def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; +def : InstRW<[FXU, LSU, Lat3, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[FXU, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LFAS$")>; def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>; @@ -900,16 +909,20 @@ def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>; def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>; def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>; -def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>; -def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY$")>; +def : InstRW<[FPU, FPU, LSU, Lat15, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MYR$")>; +def : InstRW<[FPU, Lat10, GroupAlone], (instregex "MY(H|L)R$")>; // Multiply and add / subtract -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>; -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>; -def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>; -def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>; +def : InstRW<[FPU2, FPU2, LSU, GroupAlone], (instregex "MAY$")>; +def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[FPU, GroupAlone], (instregex "MAY(H|L)R$")>; // Division def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>; @@ -949,16 +962,21 @@ def : InstRW<[DFU, Lat20], (instregex "LDETR$")>; def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>; // Convert from fixed / logical -def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>; -def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXU, DFU, Lat9, GroupAlone], (instregex "CDFTR$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CDGTR(A)?$")>; +def : InstRW<[FXU, DFU2, DFU2, GroupAlone], (instregex "CXFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CXGTR(A)?$")>; def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>; -def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXLFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat6, GroupAlone], (instregex "CXLGTR$")>; // Convert to fixed / logical -def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>; -def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>; -def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>; -def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CFDTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CGDTR(A)?$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CFXTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CGXTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)XTR$")>; // Convert from / to signed / unsigned packed def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>; @@ -967,7 +985,7 @@ def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>; def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>; // Perform floating-point operation -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>; +def : InstRW<[FXU, Lat30], (instregex "PFPO$")>; //===----------------------------------------------------------------------===// // DFP: Unary arithmetic @@ -979,7 +997,7 @@ def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>; // Extract biased exponent def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>; -def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>; +def : InstRW<[FXU, DFU2, Lat15, GroupAlone], (instregex "EEXTR$")>; // Extract significance def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>; @@ -1010,15 +1028,15 @@ def : InstRW<[DFU, Lat30], (instregex "QADTR$")>; def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>; // Reround -def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "RRDTR$")>; def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>; // Shift significand left/right -def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, DFU, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent -def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "IEDTR$")>; def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// @@ -1027,15 +1045,15 @@ def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; // Compare def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>; -def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>; +def : InstRW<[DFU, DFU, Lat15], (instregex "(K|C)XTR$")>; // Compare biased exponent def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>; -def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>; +def : InstRW<[DFU2, Lat9], (instregex "CEXTR$")>; // Test Data Class/Group def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; -def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; +def : InstRW<[LSU, DFU2, Lat15], (instregex "TD(C|G)XT$")>; // -------------------------------- System ---------------------------------- // @@ -1046,19 +1064,20 @@ def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>; -def : InstRW<[FXU, Lat3], (instregex "IPK$")>; -def : InstRW<[LSU], (instregex "SPKA$")>; -def : InstRW<[LSU], (instregex "SSM$")>; -def : InstRW<[FXU], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXU, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; def : InstRW<[FXU, Lat3], (instregex "IAC$")>; -def : InstRW<[LSU], (instregex "SAC(F)?$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; //===----------------------------------------------------------------------===// // System: Control Register Instructions //===----------------------------------------------------------------------===// def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>; -def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[FXU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], + (instregex "STCT(L|G)$")>; def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>; def : InstRW<[FXU, Lat30], (instregex "ESEA$")>; @@ -1103,16 +1122,17 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>; //===----------------------------------------------------------------------===// def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; -def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCSK$")>; +def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVCDK$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>; -def : InstRW<[LSU, Lat30], (instregex "MVPG$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; //===----------------------------------------------------------------------===// // System: Address-Space Instructions //===----------------------------------------------------------------------===// def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>; -def : InstRW<[LSU], (instregex "PALB$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>; def : InstRW<[FXU, Lat30], (instregex "PR$")>; def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>; @@ -1124,7 +1144,7 @@ def : InstRW<[FXU, Lat20], (instregex "TAR$")>; // System: Linkage-Stack Instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>; +def : InstRW<[FXU, LSU, Lat30, EndGroup], (instregex "BAKR$")>; def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>; def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>; @@ -1161,9 +1181,9 @@ def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXU, Lat30], (instregex "SVC$")>; -def : InstRW<[FXU], (instregex "MC$")>; +def : InstRW<[FXU, GroupAlone], (instregex "MC$")>; def : InstRW<[FXU, Lat30], (instregex "DIAG$")>; -def : InstRW<[FXU], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TRAC(E|G)$")>; def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>; def : InstRW<[FXU, Lat30], (instregex "SIGP$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>; @@ -1176,7 +1196,8 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>; def : InstRW<[FXU], (instregex "LPP$")>; def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>; def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>; -def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>; +def : InstRW<[FXU, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "L(P|S)CTL$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>; def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 59f37205f412..a0f2115eb9d7 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -69,7 +69,7 @@ def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 8; } def : WriteRes { let Latency = 9; } def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } def : WriteRes; // Virtual Branching Unit // -------------------------- INSTRUCTIONS ---------------------------------- // @@ -251,7 +251,7 @@ def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; // Load multiple disjoint -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "LMD$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; // Store multiple (estimated average of 3 ops) def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone], @@ -413,13 +413,13 @@ def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>; def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>; def : InstRW<[FXU, Lat8], (instregex "MSGR$")>; def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>; -def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; -def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXU, FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXU, FXU, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXU, Lat5], (instregex "MGHI$")>; def : InstRW<[FXU, Lat5], (instregex "MHI$")>; def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>; -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXU, FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder @@ -446,7 +446,8 @@ def : InstRW<[FXU], (instregex "SLL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRA(G|K)?$")>; def : InstRW<[FXU], (instregex "SLA(G|K)?$")>; -def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>; +def : InstRW<[FXU, FXU, FXU, FXU, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -544,7 +545,7 @@ def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone], (instregex "CDSG$")>; // Compare and swap and store -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSST$")>; // Perform locked operation def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; @@ -560,36 +561,44 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; // Translate and convert //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[FXU, FXU, FXU, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXU, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; //===----------------------------------------------------------------------===// // Message-security assist //===----------------------------------------------------------------------===// -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; +def : InstRW<[FXU, Lat30], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; -def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[FXU, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "CVBG$")>; +def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXU, FXU, FXU, DFU2, DFU2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXU, FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat15, GroupAlone], (instregex "(A|S|ZA)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone], +def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "(M|D)P$")>; -def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXU, FXU, DFU2, DFU2, LSU, LSU, LSU, Lat15, GroupAlone], (instregex "SRP$")>; -def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; -def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>; +def : InstRW<[DFU2, DFU2, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; +def : InstRW<[DFU2, LSU, LSU, Lat5, GroupAlone], (instregex "TP$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; //===----------------------------------------------------------------------===// @@ -659,7 +668,7 @@ def : InstRW<[FXU], (instregex "PPA$")>; //===----------------------------------------------------------------------===// // Find leftmost one -def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; +def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; // Population count def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; @@ -670,14 +679,14 @@ def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; -def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[FXU, Lat30], (instregex "SRSTU$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; // Various complex instructions -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXU, Lat30], (instregex "CMPSC$")>; // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; @@ -818,9 +827,9 @@ def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>; // Multiply and add / subtract -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>; // Division @@ -829,7 +838,7 @@ def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>; // Divide to integer -def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; +def : InstRW<[FPU, Lat30], (instregex "DI(E|D)BR$")>; //===----------------------------------------------------------------------===// // FP: Comparisons @@ -851,10 +860,10 @@ def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>; def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>; -def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>; -def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; -def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>; +def : InstRW<[FXU, LSU, Lat3, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[FXU, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LFAS$")>; +def : InstRW<[FXU, GroupAlone], (instregex "SRNM(B|T)?$")>; // --------------------- Hexadecimal floating point ------------------------- // @@ -938,16 +947,20 @@ def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>; def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>; def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>; -def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>; -def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY$")>; +def : InstRW<[FPU, FPU, LSU, Lat15, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MYR$")>; +def : InstRW<[FPU, Lat10, GroupAlone], (instregex "MY(H|L)R$")>; // Multiply and add / subtract -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>; -def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>; -def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>; -def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>; +def : InstRW<[FPU2, FPU2, LSU, GroupAlone], (instregex "MAY$")>; +def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[FPU, GroupAlone], (instregex "MAY(H|L)R$")>; // Division def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>; @@ -987,16 +1000,21 @@ def : InstRW<[DFU, Lat20], (instregex "LDETR$")>; def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>; // Convert from fixed / logical -def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>; -def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXU, DFU, Lat9, GroupAlone], (instregex "CDFTR$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CDGTR(A)?$")>; +def : InstRW<[FXU, DFU2, DFU2, GroupAlone], (instregex "CXFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CXGTR(A)?$")>; def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>; -def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXLFTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat6, GroupAlone], (instregex "CXLGTR$")>; // Convert to fixed / logical -def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>; -def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>; -def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>; -def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CFDTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CGDTR(A)?$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CFXTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CGXTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)XTR$")>; // Convert from / to signed / unsigned packed def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>; @@ -1007,11 +1025,11 @@ def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$") // Convert from / to zoned def : InstRW<[LSU, DFU2, Lat7, GroupAlone], (instregex "CDZT$")>; def : InstRW<[LSU, LSU, DFU2, DFU2, Lat10, GroupAlone], (instregex "CXZT$")>; -def : InstRW<[FXU, LSU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>; +def : InstRW<[FXU, LSU, DFU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>; def : InstRW<[FXU, LSU, DFU, DFU, Lat15, GroupAlone], (instregex "CZXT$")>; // Perform floating-point operation -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>; +def : InstRW<[FXU, Lat30], (instregex "PFPO$")>; //===----------------------------------------------------------------------===// // DFP: Unary arithmetic @@ -1023,7 +1041,7 @@ def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>; // Extract biased exponent def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>; -def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>; +def : InstRW<[FXU, DFU2, Lat15, GroupAlone], (instregex "EEXTR$")>; // Extract significance def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>; @@ -1054,15 +1072,15 @@ def : InstRW<[DFU, Lat30], (instregex "QADTR$")>; def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>; // Reround -def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "RRDTR$")>; def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>; // Shift significand left/right -def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, DFU, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent -def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "IEDTR$")>; def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// @@ -1071,15 +1089,15 @@ def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; // Compare def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>; -def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>; +def : InstRW<[DFU, DFU, Lat15], (instregex "(K|C)XTR$")>; // Compare biased exponent def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>; -def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>; +def : InstRW<[DFU, DFU, Lat9], (instregex "CEXTR$")>; // Test Data Class/Group def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; -def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; +def : InstRW<[LSU, DFU2, Lat15], (instregex "TD(C|G)XT$")>; // -------------------------------- System ---------------------------------- // @@ -1090,19 +1108,20 @@ def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>; -def : InstRW<[FXU, Lat3], (instregex "IPK$")>; -def : InstRW<[LSU], (instregex "SPKA$")>; -def : InstRW<[LSU], (instregex "SSM$")>; -def : InstRW<[FXU], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXU, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; def : InstRW<[FXU, Lat3], (instregex "IAC$")>; -def : InstRW<[LSU], (instregex "SAC(F)?$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; //===----------------------------------------------------------------------===// // System: Control Register Instructions //===----------------------------------------------------------------------===// def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>; -def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[FXU, LSU, LSU, LSU, LSU, Lat30, GroupAlone], + (instregex "STCT(L|G)$")>; def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>; def : InstRW<[FXU, Lat30], (instregex "ESEA$")>; @@ -1148,16 +1167,17 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>; //===----------------------------------------------------------------------===// def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; -def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[LSU, Lat6, Lat30, GroupAlone], (instregex "MVCSK$")>; +def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVCDK$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>; -def : InstRW<[LSU, Lat30], (instregex "MVPG$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; //===----------------------------------------------------------------------===// // System: Address-Space Instructions //===----------------------------------------------------------------------===// def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>; -def : InstRW<[LSU], (instregex "PALB$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>; def : InstRW<[FXU, Lat30], (instregex "PR$")>; def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>; @@ -1169,7 +1189,7 @@ def : InstRW<[FXU, Lat20], (instregex "TAR$")>; // System: Linkage-Stack Instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>; +def : InstRW<[FXU, LSU, Lat30, EndGroup], (instregex "BAKR$")>; def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>; def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>; @@ -1206,7 +1226,7 @@ def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXU, Lat30], (instregex "SVC$")>; -def : InstRW<[FXU], (instregex "MC$")>; +def : InstRW<[FXU, GroupAlone], (instregex "MC$")>; def : InstRW<[FXU, Lat30], (instregex "DIAG$")>; def : InstRW<[FXU], (instregex "TRAC(E|G)$")>; def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>; @@ -1221,7 +1241,8 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>; def : InstRW<[FXU], (instregex "LPP$")>; def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>; def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>; -def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>; +def : InstRW<[FXU, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "L(P|S)CTL$")>; def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>; def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index 7391df8342ef..13ceb371a425 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -200,14 +200,26 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); break; + case SystemZ::WFASB: + Changed |= shortenOn001AddCC(MI, SystemZ::AEBR); + break; + case SystemZ::WFDDB: Changed |= shortenOn001(MI, SystemZ::DDBR); break; + case SystemZ::WFDSB: + Changed |= shortenOn001(MI, SystemZ::DEBR); + break; + case SystemZ::WFIDB: Changed |= shortenFPConv(MI, SystemZ::FIDBRA); break; + case SystemZ::WFISB: + Changed |= shortenFPConv(MI, SystemZ::FIEBRA); + break; + case SystemZ::WLDEB: Changed |= shortenOn01(MI, SystemZ::LDEBR); break; @@ -220,30 +232,58 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn001(MI, SystemZ::MDBR); break; + case SystemZ::WFMSB: + Changed |= shortenOn001(MI, SystemZ::MEEBR); + break; + case SystemZ::WFLCDB: Changed |= shortenOn01(MI, SystemZ::LCDFR); break; + case SystemZ::WFLCSB: + Changed |= shortenOn01(MI, SystemZ::LCDFR_32); + break; + case SystemZ::WFLNDB: Changed |= shortenOn01(MI, SystemZ::LNDFR); break; + case SystemZ::WFLNSB: + Changed |= shortenOn01(MI, SystemZ::LNDFR_32); + break; + case SystemZ::WFLPDB: Changed |= shortenOn01(MI, SystemZ::LPDFR); break; + case SystemZ::WFLPSB: + Changed |= shortenOn01(MI, SystemZ::LPDFR_32); + break; + case SystemZ::WFSQDB: Changed |= shortenOn01(MI, SystemZ::SQDBR); break; + case SystemZ::WFSQSB: + Changed |= shortenOn01(MI, SystemZ::SQEBR); + break; + case SystemZ::WFSDB: Changed |= shortenOn001AddCC(MI, SystemZ::SDBR); break; + case SystemZ::WFSSB: + Changed |= shortenOn001AddCC(MI, SystemZ::SEBR); + break; + case SystemZ::WFCDB: Changed |= shortenOn01(MI, SystemZ::CDBR); break; + case SystemZ::WFCSB: + Changed |= shortenOn01(MI, SystemZ::CEBR); + break; + case SystemZ::VL32: // For z13 we prefer LDE over LE to avoid partial register dependencies. Changed |= shortenOn0(MI, SystemZ::LDE32); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index eb4a0962f7eb..9cd09b0f911e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -47,6 +47,10 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasVector(false), HasLoadStoreOnCond2(false), HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false), HasDFPPackedConversion(false), + HasMiscellaneousExtensions2(false), HasGuardedStorage(false), + HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false), + HasVectorEnhancements1(false), HasVectorPackedDecimal(false), + HasInsertReferenceBitsMultiple(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(), FrameLowering() {} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h index b05a1bb6cafd..4829f73e080e 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -56,6 +56,13 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { bool HasLoadAndZeroRightmostByte; bool HasMessageSecurityAssist5; bool HasDFPPackedConversion; + bool HasMiscellaneousExtensions2; + bool HasGuardedStorage; + bool HasMessageSecurityAssist7; + bool HasMessageSecurityAssist8; + bool HasVectorEnhancements1; + bool HasVectorPackedDecimal; + bool HasInsertReferenceBitsMultiple; private: Triple TargetTriple; @@ -168,6 +175,33 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { // Return true if the target has the vector facility. bool hasVector() const { return HasVector; } + // Return true if the target has the miscellaneous-extensions facility 2. + bool hasMiscellaneousExtensions2() const { + return HasMiscellaneousExtensions2; + } + + // Return true if the target has the guarded-storage facility. + bool hasGuardedStorage() const { return HasGuardedStorage; } + + // Return true if the target has the message-security-assist + // extension facility 7. + bool hasMessageSecurityAssist7() const { return HasMessageSecurityAssist7; } + + // Return true if the target has the message-security-assist + // extension facility 8. + bool hasMessageSecurityAssist8() const { return HasMessageSecurityAssist8; } + + // Return true if the target has the vector-enhancements facility 1. + bool hasVectorEnhancements1() const { return HasVectorEnhancements1; } + + // Return true if the target has the vector-packed-decimal facility. + bool hasVectorPackedDecimal() const { return HasVectorPackedDecimal; } + + // Return true if the target has the insert-reference-bits-multiple facility. + bool hasInsertReferenceBitsMultiple() const { + return HasInsertReferenceBitsMultiple; + } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index cb81c0e5276e..025bf73d2df0 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -143,8 +143,10 @@ class SystemZPassConfig : public TargetPassConfig { } // end anonymous namespace void SystemZPassConfig::addIRPasses() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { addPass(createSystemZTDCPass()); + addPass(createLoopDataPrefetchPass()); + } TargetPassConfig::addIRPasses(); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 9ac768b2189d..506dc7427993 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -372,6 +372,9 @@ int SystemZTTIImpl::getArithmeticInstrCost( Opcode == Instruction::FMul || Opcode == Instruction::FDiv) { switch (ScalarBits) { case 32: { + // The vector enhancements facility 1 provides v4f32 instructions. + if (ST->hasVectorEnhancements1()) + return NumVectors; // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 6923fc6fc910..a0c6fa94f8c1 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -56,6 +56,10 @@ class SystemZTTIImpl : public BasicTTIImplBase { unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector) const; + unsigned getCacheLineSize() { return 256; } + unsigned getPrefetchDistance() { return 2000; } + unsigned getMinPrefetchStride() { return 2048; } + bool prefersVectorizedAddressing() { return false; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h index 19c93cfff0fe..91201d1fec85 100644 --- a/contrib/llvm/lib/Target/X86/X86.h +++ b/contrib/llvm/lib/Target/X86/X86.h @@ -83,6 +83,9 @@ FunctionPass *createX86WinEHStatePass(); /// the MachineInstr to MC. FunctionPass *createX86ExpandPseudoPass(); +/// This pass converts X86 cmov instructions into branch when profitable. +FunctionPass *createX86CmovConverterPass(); + /// Return a Machine IR pass that selectively replaces /// certain byte and word instructions by equivalent 32 bit instructions, /// in order to eliminate partial register usage, false dependences on diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 4ca57fe9fb00..54eabeac5126 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -814,10 +814,8 @@ def : Proc<"bdver4", [ FeatureMWAITX ]>; -// TODO: The scheduler model falls to BTVER2 model. -// The znver1 model has to be put in place. -// Zen -def: ProcessorModel<"znver1", BtVer2Model, [ +// Znver1 +def: ProcessorModel<"znver1", Znver1Model, [ FeatureADX, FeatureAES, FeatureAVX2, diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td index 6decb550ad5f..26461986427d 100644 --- a/contrib/llvm/lib/Target/X86/X86CallingConv.td +++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td @@ -448,7 +448,7 @@ def RetCC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::Swift", CCDelegateTo>, // Handle explicit CC selection - CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, + CCIfCC<"CallingConv::Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, // Handle Vectorcall CC @@ -1004,7 +1004,7 @@ def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo>, CCIfCC<"CallingConv::AnyReg", CCDelegateTo>, - CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, + CCIfCC<"CallingConv::Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::HHVM", CCDelegateTo>, diff --git a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp new file mode 100644 index 000000000000..bfc834435de5 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -0,0 +1,611 @@ +//====-- X86CmovConversion.cpp - Convert Cmov to Branch -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a pass that converts X86 cmov instructions into branch +/// when profitable. This pass is conservative, i.e., it applies transformation +/// if and only if it can gaurantee a gain with high confidence. +/// +/// Thus, the optimization applies under the following conditions: +/// 1. Consider as a candidate only CMOV in most inner loop, assuming that +/// most hotspots are represented by these loops. +/// 2. Given a group of CMOV instructions, that are using same EFLAGS def +/// instruction: +/// a. Consider them as candidates only if all have same code condition or +/// opposite one, to prevent generating more than one conditional jump +/// per EFLAGS def instruction. +/// b. Consider them as candidates only if all are profitable to be +/// converted, assuming that one bad conversion may casue a degradation. +/// 3. Apply conversion only for loop that are found profitable and only for +/// CMOV candidates that were found profitable. +/// a. Loop is considered profitable only if conversion will reduce its +/// depth cost by some thrishold. +/// b. CMOV is considered profitable if the cost of its condition is higher +/// than the average cost of its true-value and false-value by 25% of +/// branch-misprediction-penalty, this to assure no degredassion even +/// with 25% branch misprediction. +/// +/// Note: This pass is assumed to run on SSA machine code. +//===----------------------------------------------------------------------===// +// +// External interfaces: +// FunctionPass *llvm::createX86CmovConverterPass(); +// bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF); +// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-cmov-converter" + +STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups"); +STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates"); +STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops"); +STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups"); + +namespace { +// This internal switch can be used to turn off the cmov/branch optimization. +static cl::opt + EnableCmovConverter("x86-cmov-converter", + cl::desc("Enable the X86 cmov-to-branch optimization."), + cl::init(true), cl::Hidden); + +/// Converts X86 cmov instructions into branches when profitable. +class X86CmovConverterPass : public MachineFunctionPass { +public: + X86CmovConverterPass() : MachineFunctionPass(ID) {} + ~X86CmovConverterPass() {} + + StringRef getPassName() const override { return "X86 cmov Conversion"; } + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + /// Pass identification, replacement for typeid. + static char ID; + + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + TargetSchedModel TSchedModel; + + /// List of consecutive CMOV instructions. + typedef SmallVector CmovGroup; + typedef SmallVector CmovGroups; + + /// Collect all CMOV-group-candidates in \p CurrLoop and update \p + /// CmovInstGroups accordingly. + /// + /// \param CurrLoop Loop being processed. + /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. + /// \returns true iff it found any CMOV-group-candidate. + bool collectCmovCandidates(MachineLoop *CurrLoop, CmovGroups &CmovInstGroups); + + /// Check if it is profitable to transform each CMOV-group-candidates into + /// branch. Remove all groups that are not profitable from \p CmovInstGroups. + /// + /// \param CurrLoop Loop being processed. + /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. + /// \returns true iff any CMOV-group-candidate remain. + bool checkForProfitableCmovCandidates(MachineLoop *CurrLoop, + CmovGroups &CmovInstGroups); + + /// Convert the given list of consecutive CMOV instructions into a branch. + /// + /// \param Group Consecutive CMOV instructions to be converted into branch. + void convertCmovInstsToBranches(SmallVectorImpl &Group) const; +}; + +char X86CmovConverterPass::ID = 0; + +void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); +} + +bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + if (!EnableCmovConverter) + return false; + + DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() + << "**********\n"); + + bool Changed = false; + MachineLoopInfo &MLI = getAnalysis(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + MRI = &MF.getRegInfo(); + TII = STI.getInstrInfo(); + TSchedModel.init(STI.getSchedModel(), &STI, TII); + + //===--------------------------------------------------------------------===// + // Algorithm + // --------- + // For each inner most loop + // collectCmovCandidates() { + // Find all CMOV-group-candidates. + // } + // + // checkForProfitableCmovCandidates() { + // * Calculate both loop-depth and optimized-loop-depth. + // * Use these depth to check for loop transformation profitability. + // * Check for CMOV-group-candidate transformation profitability. + // } + // + // For each profitable CMOV-group-candidate + // convertCmovInstsToBranches() { + // * Create FalseBB, SinkBB, Conditional branch to SinkBB. + // * Replace each CMOV instruction with a PHI instruction in SinkBB. + // } + // + // Note: For more details, see each function description. + //===--------------------------------------------------------------------===// + for (MachineBasicBlock &MBB : MF) { + MachineLoop *CurrLoop = MLI.getLoopFor(&MBB); + + // Optimize only inner most loops. + if (!CurrLoop || CurrLoop->getHeader() != &MBB || + !CurrLoop->getSubLoops().empty()) + continue; + + // List of consecutive CMOV instructions to be processed. + CmovGroups CmovInstGroups; + + if (!collectCmovCandidates(CurrLoop, CmovInstGroups)) + continue; + + if (!checkForProfitableCmovCandidates(CurrLoop, CmovInstGroups)) + continue; + + Changed = true; + for (auto &Group : CmovInstGroups) + convertCmovInstsToBranches(Group); + } + return Changed; +} + +bool X86CmovConverterPass::collectCmovCandidates(MachineLoop *CurrLoop, + CmovGroups &CmovInstGroups) { + //===--------------------------------------------------------------------===// + // Collect all CMOV-group-candidates and add them into CmovInstGroups. + // + // CMOV-group: + // CMOV instructions, in same MBB, that uses same EFLAGS def instruction. + // + // CMOV-group-candidate: + // CMOV-group where all the CMOV instructions are + // 1. consecutive. + // 2. have same condition code or opposite one. + // 3. have only operand registers (X86::CMOVrr). + //===--------------------------------------------------------------------===// + // List of possible improvement (TODO's): + // -------------------------------------- + // TODO: Add support for X86::CMOVrm instructions. + // TODO: Add support for X86::SETcc instructions. + // TODO: Add support for CMOV-groups with non consecutive CMOV instructions. + //===--------------------------------------------------------------------===// + + // Current processed CMOV-Group. + CmovGroup Group; + for (auto *MBB : CurrLoop->getBlocks()) { + Group.clear(); + // Condition code of first CMOV instruction current processed range and its + // opposite condition code. + X86::CondCode FirstCC, FirstOppCC; + // Indicator of a non CMOVrr instruction in the current processed range. + bool FoundNonCMOVInst = false; + // Indicator for current processed CMOV-group if it should be skipped. + bool SkipGroup = false; + + for (auto &I : *MBB) { + X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode()); + // Check if we found a X86::CMOVrr instruction. + if (CC != X86::COND_INVALID && !I.mayLoad()) { + if (Group.empty()) { + // We found first CMOV in the range, reset flags. + FirstCC = CC; + FirstOppCC = X86::GetOppositeBranchCondition(CC); + FoundNonCMOVInst = false; + SkipGroup = false; + } + Group.push_back(&I); + // Check if it is a non-consecutive CMOV instruction or it has different + // condition code than FirstCC or FirstOppCC. + if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC)) + // Mark the SKipGroup indicator to skip current processed CMOV-Group. + SkipGroup = true; + continue; + } + // If Group is empty, keep looking for first CMOV in the range. + if (Group.empty()) + continue; + + // We found a non X86::CMOVrr instruction. + FoundNonCMOVInst = true; + // Check if this instruction define EFLAGS, to determine end of processed + // range, as there would be no more instructions using current EFLAGS def. + if (I.definesRegister(X86::EFLAGS)) { + // Check if current processed CMOV-group should not be skipped and add + // it as a CMOV-group-candidate. + if (!SkipGroup) + CmovInstGroups.push_back(Group); + else + ++NumOfSkippedCmovGroups; + Group.clear(); + } + } + // End of basic block is considered end of range, check if current processed + // CMOV-group should not be skipped and add it as a CMOV-group-candidate. + if (Group.empty()) + continue; + if (!SkipGroup) + CmovInstGroups.push_back(Group); + else + ++NumOfSkippedCmovGroups; + } + + NumOfCmovGroupCandidate += CmovInstGroups.size(); + return !CmovInstGroups.empty(); +} + +/// \returns Depth of CMOV instruction as if it was converted into branch. +/// \param TrueOpDepth depth cost of CMOV true value operand. +/// \param FalseOpDepth depth cost of CMOV false value operand. +static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) { + //===--------------------------------------------------------------------===// + // With no info about branch weight, we assume 50% for each value operand. + // Thus, depth of optimized CMOV instruction is the rounded up average of + // its True-Operand-Value-Depth and False-Operand-Value-Depth. + //===--------------------------------------------------------------------===// + return (TrueOpDepth + FalseOpDepth + 1) / 2; +} + +bool X86CmovConverterPass::checkForProfitableCmovCandidates( + MachineLoop *CurrLoop, CmovGroups &CmovInstGroups) { + struct DepthInfo { + /// Depth of original loop. + unsigned Depth; + /// Depth of optimized loop. + unsigned OptDepth; + }; + /// Number of loop iterations to calculate depth for ?! + static const unsigned LoopIterations = 2; + DenseMap DepthMap; + DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}}; + enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 }; + /// For each register type maps the register to its last def instruction. + DenseMap RegDefMaps[RegTypeNum]; + /// Maps register operand to its def instruction, which can be nullptr if it + /// is unknown (e.g., operand is defined outside the loop). + DenseMap OperandToDefMap; + + // Set depth of unknown instruction (i.e., nullptr) to zero. + DepthMap[nullptr] = {0, 0}; + + SmallPtrSet CmovInstructions; + for (auto &Group : CmovInstGroups) + CmovInstructions.insert(Group.begin(), Group.end()); + + //===--------------------------------------------------------------------===// + // Step 1: Calculate instruction depth and loop depth. + // Optimized-Loop: + // loop with CMOV-group-candidates converted into branches. + // + // Instruction-Depth: + // instruction latency + max operand depth. + // * For CMOV instruction in optimized loop the depth is calculated as: + // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth) + // TODO: Find a better way to estimate the latency of the branch instruction + // rather than using the CMOV latency. + // + // Loop-Depth: + // max instruction depth of all instructions in the loop. + // Note: instruction with max depth represents the critical-path in the loop. + // + // Loop-Depth[i]: + // Loop-Depth calculated for first `i` iterations. + // Note: it is enough to calculate depth for up to two iterations. + // + // Depth-Diff[i]: + // Number of cycles saved in first 'i` iterations by optimizing the loop. + //===--------------------------------------------------------------------===// + for (unsigned I = 0; I < LoopIterations; ++I) { + DepthInfo &MaxDepth = LoopDepth[I]; + for (auto *MBB : CurrLoop->getBlocks()) { + // Clear physical registers Def map. + RegDefMaps[PhyRegType].clear(); + for (MachineInstr &MI : *MBB) { + unsigned MIDepth = 0; + unsigned MIDepthOpt = 0; + bool IsCMOV = CmovInstructions.count(&MI); + for (auto &MO : MI.uses()) { + // Checks for "isUse()" as "uses()" returns also implicit definitions. + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)]; + if (MachineInstr *DefMI = RDM.lookup(Reg)) { + OperandToDefMap[&MO] = DefMI; + DepthInfo Info = DepthMap.lookup(DefMI); + MIDepth = std::max(MIDepth, Info.Depth); + if (!IsCMOV) + MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth); + } + } + + if (IsCMOV) + MIDepthOpt = getDepthOfOptCmov( + DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth, + DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth); + + // Iterates over all operands to handle implicit definitions as well. + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI; + } + + unsigned Latency = TSchedModel.computeInstrLatency(&MI); + DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency}; + MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth); + MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt); + } + } + } + + unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth, + LoopDepth[1].Depth - LoopDepth[1].OptDepth}; + + //===--------------------------------------------------------------------===// + // Step 2: Check if Loop worth to be optimized. + // Worth-Optimize-Loop: + // case 1: Diff[1] == Diff[0] + // Critical-path is iteration independent - there is no dependency + // of critical-path instructions on critical-path instructions of + // previous iteration. + // Thus, it is enough to check gain percent of 1st iteration - + // To be conservative, the optimized loop need to have a depth of + // 12.5% cycles less than original loop, per iteration. + // + // case 2: Diff[1] > Diff[0] + // Critical-path is iteration dependent - there is dependency of + // critical-path instructions on critical-path instructions of + // previous iteration. + // Thus, it is required to check the gradient of the gain - the + // change in Depth-Diff compared to the change in Loop-Depth between + // 1st and 2nd iterations. + // To be conservative, the gradient need to be at least 50%. + // + // If loop is not worth optimizing, remove all CMOV-group-candidates. + //===--------------------------------------------------------------------===// + bool WorthOptLoop = false; + if (Diff[1] == Diff[0]) + WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth; + else if (Diff[1] > Diff[0]) + WorthOptLoop = + (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth); + + if (!WorthOptLoop) + return false; + + ++NumOfLoopCandidate; + + //===--------------------------------------------------------------------===// + // Step 3: Check for each CMOV-group-candidate if it worth to be optimized. + // Worth-Optimize-Group: + // Iff it worths to optimize all CMOV instructions in the group. + // + // Worth-Optimize-CMOV: + // Predicted branch is faster than CMOV by the difference between depth of + // condition operand and depth of taken (predicted) value operand. + // To be conservative, the gain of such CMOV transformation should cover at + // at least 25% of branch-misprediction-penalty. + //===--------------------------------------------------------------------===// + unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; + CmovGroups TempGroups; + std::swap(TempGroups, CmovInstGroups); + for (auto &Group : TempGroups) { + bool WorthOpGroup = true; + for (auto *MI : Group) { + // Avoid CMOV instruction which value is used as a pointer to load from. + // This is another conservative check to avoid converting CMOV instruction + // used with tree-search like algorithm, where the branch is unpredicted. + auto UIs = MRI->use_instructions(MI->defs().begin()->getReg()); + if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) { + unsigned Op = UIs.begin()->getOpcode(); + if (Op == X86::MOV64rm || Op == X86::MOV32rm) { + WorthOpGroup = false; + break; + } + } + + unsigned CondCost = + DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth; + unsigned ValCost = getDepthOfOptCmov( + DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, + DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); + if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) { + WorthOpGroup = false; + break; + } + } + + if (WorthOpGroup) + CmovInstGroups.push_back(Group); + } + + return !CmovInstGroups.empty(); +} + +static bool checkEFLAGSLive(MachineInstr *MI) { + if (MI->killsRegister(X86::EFLAGS)) + return false; + + // The EFLAGS operand of MI might be missing a kill marker. + // Figure out whether EFLAGS operand should LIVE after MI instruction. + MachineBasicBlock *BB = MI->getParent(); + MachineBasicBlock::iterator ItrMI = MI; + + // Scan forward through BB for a use/def of EFLAGS. + for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) { + if (I->readsRegister(X86::EFLAGS)) + return true; + if (I->definesRegister(X86::EFLAGS)) + return false; + } + + // We hit the end of the block, check whether EFLAGS is live into a successor. + for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) { + if ((*I)->isLiveIn(X86::EFLAGS)) + return true; + } + + return false; +} + +void X86CmovConverterPass::convertCmovInstsToBranches( + SmallVectorImpl &Group) const { + assert(!Group.empty() && "No CMOV instructions to convert"); + ++NumOfOptimizedCmovGroups; + + // To convert a CMOVcc instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + + // Before + // ----- + // MBB: + // cond = cmp ... + // v1 = CMOVge t1, f1, cond + // v2 = CMOVlt t2, f2, cond + // v3 = CMOVge v1, f3, cond + // + // After + // ----- + // MBB: + // cond = cmp ... + // jge %SinkMBB + // + // FalseMBB: + // jmp %SinkMBB + // + // SinkMBB: + // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] + // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch + // ; true-value with false-value + // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use + // ; previous Phi instruction result + + MachineInstr &MI = *Group.front(); + MachineInstr *LastCMOV = Group.back(); + DebugLoc DL = MI.getDebugLoc(); + X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode())); + X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction::iterator It = ++MBB->getIterator(); + MachineFunction *F = MBB->getParent(); + const BasicBlock *BB = MBB->getBasicBlock(); + + MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB); + MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB); + F->insert(It, FalseMBB); + F->insert(It, SinkMBB); + + // If the EFLAGS register isn't dead in the terminator, then claim that it's + // live into the sink and copy blocks. + if (checkEFLAGSLive(LastCMOV)) { + FalseMBB->addLiveIn(X86::EFLAGS); + SinkMBB->addLiveIn(X86::EFLAGS); + } + + // Transfer the remainder of BB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Add the false and sink blocks as its successors. + MBB->addSuccessor(FalseMBB); + MBB->addSuccessor(SinkMBB); + + // Create the conditional branch instruction. + BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB); + + // Add the sink block to the false block successors. + FalseMBB->addSuccessor(SinkMBB); + + MachineInstrBuilder MIB; + MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); + MachineBasicBlock::iterator MIItEnd = + std::next(MachineBasicBlock::iterator(LastCMOV)); + MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); + // As we are creating the PHIs, we have to be careful if there is more than + // one. Later CMOVs may reference the results of earlier CMOVs, but later + // PHIs have to reference the individual true/false inputs from earlier PHIs. + // That also means that PHI construction must work forward from earlier to + // later, and that the code must maintain a mapping from earlier PHI's + // destination registers, and the registers that went into the PHI. + DenseMap> RegRewriteTable; + + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { + unsigned DestReg = MIIt->getOperand(0).getReg(); + unsigned Op1Reg = MIIt->getOperand(1).getReg(); + unsigned Op2Reg = MIIt->getOperand(2).getReg(); + + // If this CMOV we are processing is the opposite condition from the jump we + // generated, then we have to swap the operands for the PHI that is going to + // be generated. + if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC) + std::swap(Op1Reg, Op2Reg); + + auto Op1Itr = RegRewriteTable.find(Op1Reg); + if (Op1Itr != RegRewriteTable.end()) + Op1Reg = Op1Itr->second.first; + + auto Op2Itr = RegRewriteTable.find(Op2Reg); + if (Op2Itr != RegRewriteTable.end()) + Op2Reg = Op2Itr->second.second; + + // SinkMBB: + // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ] + // ... + MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) + .addReg(Op1Reg) + .addMBB(FalseMBB) + .addReg(Op2Reg) + .addMBB(MBB); + (void)MIB; + DEBUG(dbgs() << "\tFrom: "; MIIt->dump()); + DEBUG(dbgs() << "\tTo: "; MIB->dump()); + + // Add this PHI to the rewrite table. + RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); + } + + // Now remove the CMOV(s). + MBB->erase(MIItBegin, MIItEnd); +} + +} // End anonymous namespace. + +FunctionPass *llvm::createX86CmovConverterPass() { + return new X86CmovConverterPass(); +} diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index ee9e78146305..527e5d568ac6 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -1187,7 +1187,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && CC != CallingConv::X86_64_SysV && - CC != CallingConv::X86_64_Win64) + CC != CallingConv::Win64) return false; // Don't handle popping bytes if they don't fit the ret's immediate. @@ -3171,7 +3171,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: - case CallingConv::X86_64_Win64: + case CallingConv::Win64: case CallingConv::X86_64_SysV: break; } diff --git a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp index c28746f96439..95c6f2a3fa34 100644 --- a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -22,7 +22,7 @@ /// instructions and register-to-register moves. It would /// seem like cmov(s) would also be affected, but because of the way cmov is /// really implemented by most machines as reading both the destination and -/// and source regsters, and then "merging" the two based on a condition, +/// and source registers, and then "merging" the two based on a condition, /// it really already should be considered as having a true dependence on the /// destination register as well. /// diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 65486cf7f529..44eecd664714 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1335,6 +1335,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ, VT, Custom); } + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64, + MVT::v8i64}) { + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); + } + // Need to promote to 64-bit even though we have 32-bit masked instructions // because the IR optimizers rearrange bitcasts around logic ops leaving // too many variations to handle if we don't promote them. @@ -1663,10 +1670,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 4; - // TODO: These control memcmp expansion in CGP and are set low to prevent - // altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder. - MaxLoadsPerMemcmp = 1; - MaxLoadsPerMemcmpOptSize = 1; + // TODO: These control memcmp expansion in CGP and could be raised higher, but + // that needs to benchmarked and balanced with the potential use of vector + // load/store types (PR33329). + MaxLoadsPerMemcmp = 4; + MaxLoadsPerMemcmpOptSize = 2; // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). setPrefLoopAlignment(ExperimentalPrefLoopAlignment); @@ -2661,7 +2669,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { // C calling conventions: case CallingConv::C: - case CallingConv::X86_64_Win64: + case CallingConv::Win64: case CallingConv::X86_64_SysV: // Callee pop conventions: case CallingConv::X86_ThisCall: @@ -20188,7 +20196,10 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); EVT MaskVT = Mask.getValueType(); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); @@ -20210,7 +20221,10 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); @@ -20235,7 +20249,10 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -20254,7 +20271,10 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); - auto *C = cast(ScaleOp); + auto *C = dyn_cast(ScaleOp); + // Scale must be constant. + if (!C) + return SDValue(); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -22665,10 +22685,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, SDLoc DL(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); + unsigned Opcode = Op.getOpcode(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + if (Subtarget.hasAVX512()) { + // Attempt to rotate by immediate. + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) { + if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) { + return EltBits[0] == V; + })) { + unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); + uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits); + return DAG.getNode(Op, DL, VT, R, + DAG.getConstant(RotateAmt, DL, MVT::i8)); + } + } + + // Else, fall-back on VPROLV/VPRORV. + return Op; + } assert(VT.isVector() && "Custom lowering only for vector rotates!"); assert(Subtarget.hasXOP() && "XOP support required for vector rotates!"); - assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported"); + assert((Opcode == ISD::ROTL) && "Only ROTL supported"); // XOP has 128-bit vector variable + immediate rotates. // +ve/-ve Amt = rotate left/right. @@ -22683,7 +22724,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (auto *BVAmt = dyn_cast(Amt)) { if (auto *RotateConst = BVAmt->getConstantSplatNode()) { uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue(); - assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range"); + assert(RotateAmt < EltSizeInBits && "Rotation out of range"); return DAG.getNode(X86ISD::VPROTI, DL, VT, R, DAG.getConstant(RotateAmt, DL, MVT::i8)); } @@ -24030,7 +24071,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG); case ISD::UMUL_LOHI: case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG); - case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG); + case ISD::ROTL: + case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG); diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td index cc5c09cbf0e5..705d0f7a5cf7 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1759,29 +1759,29 @@ let Predicates = Preds in { (i64 0)), (COPY_TO_REGCLASS (!cast(InstrStr##rr) _.RC:$src1, _.RC:$src2), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), + (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))))), (i64 0)), (COPY_TO_REGCLASS (!cast(InstrStr##rm) _.RC:$src1, addr:$src2), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, + (_.KVT (and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), (i64 0)), (COPY_TO_REGCLASS (!cast(InstrStr##rrk) _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))))))), (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmk) _.KRCWM:$mask, + (COPY_TO_REGCLASS (!cast(InstrStr##rmk) _.KRCWM:$mask, _.RC:$src1, addr:$src2), NewInf.KRC)>; } @@ -1798,7 +1798,7 @@ let Predicates = Preds in { (i64 0)), (COPY_TO_REGCLASS (!cast(InstrStr##rmb) _.RC:$src1, addr:$src2), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), (_.KVT (and (_.KVT _.KRCWM:$mask), (_.KVT (OpNode (_.VT _.RC:$src1), @@ -1879,7 +1879,7 @@ defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; defm : avx512_icmp_packed_rmb_lowering; @@ -2127,17 +2127,17 @@ multiclass avx512_icmp_cc_packed_lowering Preds> { let Predicates = Preds in { def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT _.RC:$src2), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), imm:$cc)), (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, + (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, _.RC:$src2, imm:$cc), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), + (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)), (i64 0)), @@ -2145,37 +2145,37 @@ let Predicates = Preds in { addr:$src2, imm:$cc), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, + (_.KVT (and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc))), (i64 0)), (COPY_TO_REGCLASS (!cast(InstrStr##rrik) _.KRCWM:$mask, - _.RC:$src1, + _.RC:$src1, _.RC:$src2, imm:$cc), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)))), (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmik) _.KRCWM:$mask, + (COPY_TO_REGCLASS (!cast(InstrStr##rmik) _.KRCWM:$mask, _.RC:$src1, addr:$src2, imm:$cc), NewInf.KRC)>; } } - + multiclass avx512_icmp_cc_packed_rmb_lowering Preds> + list Preds> : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { let Predicates = Preds in { def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), @@ -2187,7 +2187,7 @@ let Predicates = Preds in { addr:$src2, imm:$cc), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), (_.KVT (and (_.KVT _.KRCWM:$mask), (_.KVT (OpNode (_.VT _.RC:$src1), @@ -2447,17 +2447,17 @@ multiclass avx512_fcmp_cc_packed_lowering Preds> { let Predicates = Preds in { def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), - (_.VT _.RC:$src2), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), imm:$cc)), (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, + (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, _.RC:$src2, imm:$cc), NewInf.KRC)>; - + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.KVT (X86cmpm (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)), (i64 0)), @@ -2477,19 +2477,19 @@ let Predicates = Preds in { NewInf.KRC)>; } } - + multiclass avx512_fcmp_cc_packed_sae_lowering Preds> + string InstrStr, list Preds> : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> { let Predicates = Preds in def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), - (_.VT _.RC:$src2), + (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), imm:$cc, (i32 FROUND_NO_EXC))), (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrib) _.RC:$src1, + (COPY_TO_REGCLASS (!cast(InstrStr##rrib) _.RC:$src1, _.RC:$src2, imm:$cc), NewInf.KRC)>; @@ -2817,16 +2817,16 @@ let Predicates = [HasAVX512] in { def : Pat<(maskVT (scalar_to_vector GR32:$src)), (COPY_TO_REGCLASS GR32:$src, maskRC)>; - def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))), + def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))), (COPY_TO_REGCLASS maskRC:$src, GR32)>; def : Pat<(maskVT (scalar_to_vector GR8:$src)), (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; - def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))), + def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; - def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))), + def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))), (COPY_TO_REGCLASS maskRC:$src, GR32)>; } @@ -3036,7 +3036,7 @@ def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -def : Pat<(insert_subvector (v16i1 immAllZerosV), +def : Pat<(insert_subvector (v16i1 immAllZerosV), (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (i64 0)), (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrr) @@ -3044,8 +3044,8 @@ def : Pat<(insert_subvector (v16i1 immAllZerosV), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), (i8 8)), (i8 8))>; -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (and VK8:$mask, +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), (i64 0)), (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrk) @@ -3063,7 +3063,7 @@ def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2) (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; -def : Pat<(insert_subvector (v16i1 immAllZerosV), +def : Pat<(insert_subvector (v16i1 immAllZerosV), (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), (i64 0)), (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrri) @@ -3072,8 +3072,8 @@ def : Pat<(insert_subvector (v16i1 immAllZerosV), imm:$cc), (i8 8)), (i8 8))>; -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (and VK8:$mask, +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))), (i64 0)), (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrik) @@ -3379,35 +3379,35 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, "VMOVDQA32">, + HasAVX512, "VMOVDQA32">, PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512, "VMOVDQA64">, + HasAVX512, "VMOVDQA64">, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, - HasBWI, "VMOVDQU8">, + HasBWI, "VMOVDQU8">, XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, - HasBWI, "VMOVDQU16">, + HasBWI, "VMOVDQU16">, XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, - HasAVX512, "VMOVDQU32">, + HasAVX512, "VMOVDQU32">, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, - HasAVX512, "VMOVDQU64">, + HasAVX512, "VMOVDQU64">, XS, VEX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need @@ -3964,49 +3964,49 @@ def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; let hasSideEffects = 0 in { - def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2), "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], NoItinerary>, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrr">; let Constraints = "$src0 = $dst" in - def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, + def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrk">; - - def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + + def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrkz">; - def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2), "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, FoldGenData<"VMOVSDZrr">; let Constraints = "$src0 = $dst" in - def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, + def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, VR128X:$src1, FR64X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, - VEX_W, FoldGenData<"VMOVSDZrrk">; + VEX_W, FoldGenData<"VMOVSDZrrk">; - def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins f64x_info.KRCWM:$mask, VR128X:$src1, + def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.KRCWM:$mask, VR128X:$src1, FR64X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", - [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, + [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, VEX_W, FoldGenData<"VMOVSDZrrkz">; } @@ -5676,6 +5676,109 @@ defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; + +// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. +let Predicates = [HasAVX512, NoVLX] in { + def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPROLQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; + + def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPROLDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; +} + +// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. +let Predicates = [HasAVX512, NoVLX] in { + def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORVQZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))), + sub_xmm)>; + def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORVDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + sub_ymm)>; + + def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v8i64 + (VPRORQZri + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; + + def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), + imm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), + (EXTRACT_SUBREG (v16i32 + (VPRORDZri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + imm:$src2)), sub_ymm)>; +} + //===-------------------------------------------------------------------===// // 1-src variable permutation VPERMW/D/Q //===-------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp index 7e4cba1c8345..343da2573b55 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -224,7 +224,7 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, const TargetRegisterClass * X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { const Function *F = MF.getFunction(); - if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) + if (IsWin64 || (F && F->getCallingConv() == CallingConv::Win64)) return &X86::GR64_TCW64RegClass; else if (Is64Bit) return &X86::GR64_TCRegClass; @@ -334,7 +334,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Is64Bit) return CSR_64_MostRegs_SaveList; break; - case CallingConv::X86_64_Win64: + case CallingConv::Win64: if (!HasSSE) return CSR_Win64_NoSSE_SaveList; return CSR_Win64_SaveList; @@ -450,7 +450,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (Is64Bit) return CSR_64_MostRegs_RegMask; break; - case CallingConv::X86_64_Win64: + case CallingConv::Win64: return CSR_Win64_RegMask; case CallingConv::X86_64_SysV: return CSR_64_RegMask; diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td index a12fa68faf4f..d831a7974359 100644 --- a/contrib/llvm/lib/Target/X86/X86Schedule.td +++ b/contrib/llvm/lib/Target/X86/X86Schedule.td @@ -663,5 +663,6 @@ include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" include "X86ScheduleSLM.td" +include "X86ScheduleZnver1.td" include "X86ScheduleBtVer2.td" diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td index ed53893b779c..9dcc968a1a7a 100644 --- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -370,6 +370,22 @@ def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; +//////////////////////////////////////////////////////////////////////////////// +// SSE4A instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteEXTRQ: SchedWriteRes<[JFPU01]> { + let Latency = 1; + let ResourceCycles = [1]; +} +def : InstRW<[WriteEXTRQ], (instregex "EXTRQ")>; + +def WriteINSERTQ: SchedWriteRes<[JFPU01]> { + let Latency = 2; + let ResourceCycles = [4]; +} +def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>; + //////////////////////////////////////////////////////////////////////////////// // AVX instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td new file mode 100644 index 000000000000..d5b4cfe2ddee --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -0,0 +1,223 @@ +//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Znver1 to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def Znver1Model : SchedMachineModel { + // Zen can decode 4 instructions per cycle. + let IssueWidth = 4; + // Based on the reorder buffer we define MicroOpBufferSize + let MicroOpBufferSize = 192; + let LoadLatency = 4; + let MispredictPenalty = 17; + let HighLatency = 25; + let PostRAScheduler = 1; + + // FIXME: This variable is required for incomplete model. + // We haven't catered all instructions. + // So, we reset the value of this variable so as to + // say that the model is incomplete. + let CompleteModel = 0; +} + +let SchedModel = Znver1Model in { + +// Zen can issue micro-ops to 10 different units in one cycle. +// These are +// * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3) +// * Two AGU units (ZAGU0, ZAGU1) +// * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3) +// AGUs feed load store queues @two loads and 1 store per cycle. + +// Four ALU units are defined below +def ZnALU0 : ProcResource<1>; +def ZnALU1 : ProcResource<1>; +def ZnALU2 : ProcResource<1>; +def ZnALU3 : ProcResource<1>; + +// Two AGU units are defined below +def ZnAGU0 : ProcResource<1>; +def ZnAGU1 : ProcResource<1>; + +// Four FPU units are defined below +def ZnFPU0 : ProcResource<1>; +def ZnFPU1 : ProcResource<1>; +def ZnFPU2 : ProcResource<1>; +def ZnFPU3 : ProcResource<1>; + +// FPU grouping +def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>; +def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>; +def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>; +def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>; +def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>; +def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>; +def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>; +def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>; + +// Below are the grouping of the units. +// Micro-ops to be issued to multiple units are tackled this way. + +// ALU grouping +// ZnALU03 - 0,3 grouping +def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>; + +// 56 Entry (14x4 entries) Int Scheduler +def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> { + let BufferSize=56; +} + +// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations +// but are relevant for some instructions +def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> { + let BufferSize=28; +} + +// Integer Multiplication issued on ALU1. +def ZnMultiplier : ProcResource<1>; + +// Integer division issued on ALU2. +def ZnDivider : ProcResource<1>; + +// 4 Cycles load-to use Latency is captured +def : ReadAdvance; + +// (a folded load is an instruction that loads and does some operation) +// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops. +// a. load and +// b. addpd +// This multiclass is for folded loads for integer units. +multiclass ZnWriteResPair { + // Register variant takes 1-cycle on Execution Port. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on ZnAGU + // adds 4 cycles to the latency. + def : WriteRes { + let Latency = !add(Lat, 4); + } +} + +// This multiclass is for folded loads for floating point units. +multiclass ZnWriteResFpuPair { + // Register variant takes 1-cycle on Execution Port. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on ZnAGU + // adds 7 cycles to the latency. + def : WriteRes { + let Latency = !add(Lat, 7); + } +} + +// WriteRMW is set for instructions with Memory write +// operation in codegen +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes { let Latency = 8; } + +def : WriteRes; +def : WriteRes; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; + +// IDIV +def : WriteRes { + let Latency = 41; + let ResourceCycles = [1, 41]; +} + +def : WriteRes { + let Latency = 45; + let ResourceCycles = [1, 4, 41]; +} + +// IMUL +def : WriteRes{ + let Latency = 4; +} +def : WriteRes { + let Latency = 4; +} + +def : WriteRes { + let Latency = 8; +} + +// Floating point operations +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; + +// Vector integer operations which uses FPU units +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; + +// Vector Shift Operations +defm : ZnWriteResFpuPair; + +// AES Instructions. +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; + +def : WriteRes; +def : WriteRes; + +// Following instructions with latency=100 are microcoded. +// We set long latency so as to block the entire pipeline. +defm : ZnWriteResFpuPair; + +//Microcoded Instructions +let Latency = 100 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + } +} diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index fa0afe29586b..427a0001bef9 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -597,7 +597,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { case CallingConv::Intel_OCL_BI: return isTargetWin64(); // This convention allows using the Win64 convention on other targets. - case CallingConv::X86_64_Win64: + case CallingConv::Win64: return true; // This convention allows using the SysV convention on Windows targets. case CallingConv::X86_64_SysV: diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp index 8d891c983fab..08c2cdaefe71 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -375,6 +375,7 @@ bool X86PassConfig::addILPOpts() { addPass(&EarlyIfConverterID); if (EnableMachineCombinerPass) addPass(&MachineCombinerID); + addPass(createX86CmovConverterPass()); return true; } diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h index aaa6d58bd134..c16207973b39 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h @@ -40,6 +40,8 @@ class X86TargetMachine final : public LLVMTargetMachine { ~X86TargetMachine() override; const X86Subtarget *getSubtargetImpl(const Function &F) const override; + // The no argument getSubtargetImpl, while it exists on some targets, is + // deprecated and should not be used. const X86Subtarget *getSubtargetImpl() const = delete; TargetIRAnalysis getTargetIRAnalysis() override; diff --git a/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp new file mode 100644 index 000000000000..a7de79306074 --- /dev/null +++ b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -0,0 +1,160 @@ +//===- DlltoolDriver.cpp - dlltool.exe-compatible driver ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines an interface to a dlltool.exe-compatible driver. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/COFFModuleDefinition.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Path.h" + +#include +#include + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::COFF; + +namespace { + +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ + {X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \ + X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#include "Options.inc" +#undef OPTION +}; + +class DllOptTable : public llvm::opt::OptTable { +public: + DllOptTable() : OptTable(infoTable, false) {} +}; + +} // namespace + +std::vector> OwningMBs; + +// Opens a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +MemoryBufferRef openFile(StringRef Path) { + ErrorOr> MB = MemoryBuffer::getFile(Path); + + if (std::error_code EC = MB.getError()) + llvm::errs() << "fail openFile: " << EC.message() << "\n"; + + MemoryBufferRef MBRef = MB.get()->getMemBufferRef(); + OwningMBs.push_back(std::move(MB.get())); // take ownership + return MBRef; +} + +static MachineTypes getEmulation(StringRef S) { + return StringSwitch(S) + .Case("i386", IMAGE_FILE_MACHINE_I386) + .Case("i386:x86-64", IMAGE_FILE_MACHINE_AMD64) + .Case("arm", IMAGE_FILE_MACHINE_ARMNT) + .Default(IMAGE_FILE_MACHINE_UNKNOWN); +} + +static std::string getImplibPath(std::string Path) { + SmallString<128> Out = StringRef("lib"); + Out.append(Path); + sys::path::replace_extension(Out, ".a"); + return Out.str(); +} + +int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { + DllOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + llvm::opt::InputArgList Args = + Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount); + if (MissingCount) { + llvm::errs() << Args.getArgString(MissingIndex) << ": missing argument\n"; + return 1; + } + + // Handle when no input or output is specified + if (Args.hasArgNoClaim(OPT_INPUT) || + (!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) { + Table.PrintHelp(outs(), ArgsArr[0], "dlltool", false); + llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm\n"; + return 1; + } + + if (!Args.hasArgNoClaim(OPT_m) && Args.hasArgNoClaim(OPT_d)) { + llvm::errs() << "error: no target machine specified\n" + << "supported targets: i386, i386:x86-64, arm\n"; + return 1; + } + + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + + MemoryBufferRef MB; + if (auto *Arg = Args.getLastArg(OPT_d)) + MB = openFile(Arg->getValue()); + + if (!MB.getBufferSize()) { + llvm::errs() << "definition file empty\n"; + return 1; + } + + COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; + if (auto *Arg = Args.getLastArg(OPT_m)) + Machine = getEmulation(Arg->getValue()); + + if (Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + llvm::errs() << "unknown target\n"; + return 1; + } + + Expected Def = + parseCOFFModuleDefinition(MB, Machine, true); + + if (!Def) { + llvm::errs() << "error parsing definition\n" + << errorToErrorCode(Def.takeError()).message(); + return 1; + } + + // Do this after the parser because parseCOFFModuleDefinition sets OutputFile. + if (auto *Arg = Args.getLastArg(OPT_D)) + Def->OutputFile = Arg->getValue(); + + if (Def->OutputFile.empty()) { + llvm::errs() << "no output file specified\n"; + return 1; + } + + std::string Path = Args.getLastArgValue(OPT_l); + if (Path.empty()) + Path = getImplibPath(Def->OutputFile); + + if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine)) + return 1; + return 0; +} diff --git a/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td new file mode 100644 index 000000000000..213c6a4d7674 --- /dev/null +++ b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td @@ -0,0 +1,26 @@ +include "llvm/Option/OptParser.td" + +def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target machine">; +def m_long : JoinedOrSeparate<["--"], "machine">, Alias; + +def l: JoinedOrSeparate<["-"], "l">, HelpText<"Generate an import lib">; +def l_long : JoinedOrSeparate<["--"], "output-lib">, Alias; + +def D: JoinedOrSeparate<["-"], "D">, HelpText<"Specify the input DLL Name">; +def D_long : JoinedOrSeparate<["--"], "dllname">, Alias; + +def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">; +def d_long : JoinedOrSeparate<["--"], "input-def">, Alias; + +//============================================================================== +// The flags below do nothing. They are defined only for dlltool compatibility. +//============================================================================== + +def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">; +def k_alias: Flag<["--"], "kill-at">, Alias; + +def S: JoinedOrSeparate<["-"], "S">, HelpText<"Assembler">; +def S_alias: JoinedOrSeparate<["--"], "as">, Alias; + +def f: JoinedOrSeparate<["-"], "f">, HelpText<"Assembler Flags">; +def f_alias: JoinedOrSeparate<["--"], "as-flags">, Alias; diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 3d57acf06e74..93eab680ca6b 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2026,6 +2026,24 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, continue; } + // LLVM's definition of dominance allows instructions that are cyclic + // in unreachable blocks, e.g.: + // %pat = select i1 %condition, @global, i16* %pat + // because any instruction dominates an instruction in a block that's + // not reachable from entry. + // So, remove unreachable blocks from the function, because a) there's + // no point in analyzing them and b) GlobalOpt should otherwise grow + // some more complicated logic to break these cycles. + // Removing unreachable blocks might invalidate the dominator so we + // recalculate it. + if (!F->isDeclaration()) { + if (removeUnreachableBlocks(*F)) { + auto &DT = LookupDomTree(*F); + DT.recalculate(*F); + Changed = true; + } + } + Changed |= processGlobal(*F, TLI, LookupDomTree); if (!F->hasLocalLinkage()) diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index 00ddb93df830..317770d133b3 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -909,7 +909,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // To check this we also need to nuke any dead constant uses (perhaps // made dead by this operation on other functions). Callee.removeDeadConstantUsers(); - if (Callee.use_empty()) { + if (Callee.use_empty() && !CG.isLibFunction(Callee)) { Calls.erase( std::remove_if(Calls.begin() + i + 1, Calls.end(), [&Callee](const std::pair &Call) { diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp index ac4765f96075..6baada2c1ae1 100644 --- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -173,8 +173,10 @@ class SampleProfileLoader { void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); bool computeBlockWeights(Function &F); void findEquivalenceClasses(Function &F); + template void findEquivalencesFor(BasicBlock *BB1, ArrayRef Descendants, - DominatorTreeBase *DomTree); + DominatorTreeBase *DomTree); + void propagateWeights(Function &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(Function &F); @@ -217,7 +219,7 @@ class SampleProfileLoader { /// \brief Dominance, post-dominance and loop information. std::unique_ptr DT; - std::unique_ptr> PDT; + std::unique_ptr> PDT; std::unique_ptr LI; AssumptionCacheTracker *ACT; @@ -773,9 +775,10 @@ bool SampleProfileLoader::inlineHotFunctions( /// \param DomTree Opposite dominator tree. If \p Descendants is filled /// with blocks from \p BB1's dominator tree, then /// this is the post-dominator tree, and vice versa. +template void SampleProfileLoader::findEquivalencesFor( BasicBlock *BB1, ArrayRef Descendants, - DominatorTreeBase *DomTree) { + DominatorTreeBase *DomTree) { const BasicBlock *EC = EquivalenceClass[BB1]; uint64_t Weight = BlockWeights[EC]; for (const auto *BB2 : Descendants) { @@ -1283,7 +1286,7 @@ void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) { DT.reset(new DominatorTree); DT->recalculate(F); - PDT.reset(new DominatorTreeBase(true)); + PDT.reset(new PostDomTreeBase()); PDT->recalculate(F); LI.reset(new LoopInfo); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 773c86e23707..fdc9c373b95e 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1284,6 +1284,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); + if (match(Op1, m_One())) { + // (1 << x) & 1 --> zext(x == 0) + // (1 >> x) & 1 --> zext(x == 0) + Value *X; + if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X))))) { + Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(I.getType(), 0)); + return new ZExtInst(IsZero, I.getType()); + } + } + if (ConstantInt *AndRHS = dyn_cast(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); @@ -1315,23 +1325,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { break; } - case Instruction::Sub: - // -x & 1 -> x & 1 - if (AndRHSMask.isOneValue() && match(Op0LHS, m_Zero())) - return BinaryOperator::CreateAnd(Op0RHS, AndRHS); - - break; - - case Instruction::Shl: - case Instruction::LShr: - // (1 << x) & 1 --> zext(x == 0) - // (1 >> x) & 1 --> zext(x == 0) - if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) { - Value *NewICmp = - Builder.CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); - return new ZExtInst(NewICmp, I.getType()); - } - break; } // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth @@ -1417,12 +1410,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } - // (A&((~A)|B)) -> A&B - if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A)))) - return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A)))) - return BinaryOperator::CreateAnd(A, Op0); - // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) @@ -2020,18 +2007,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *A, *B; - // ((~A & B) | A) -> (A | B) - if (match(Op0, m_c_And(m_Not(m_Specific(Op1)), m_Value(A)))) - return BinaryOperator::CreateOr(A, Op1); - if (match(Op1, m_c_And(m_Not(m_Specific(Op0)), m_Value(A)))) - return BinaryOperator::CreateOr(Op0, A); - - // ((A & B) | ~A) -> (~A | B) - // The NOT is guaranteed to be in the RHS by complexity ordering. - if (match(Op1, m_Not(m_Value(A))) && - match(Op0, m_c_And(m_Specific(A), m_Value(B)))) - return BinaryOperator::CreateOr(Op1, B); - // (A & C)|(B & D) Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && @@ -2176,17 +2151,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Not, Op0); } - // (A & B) | (~A ^ B) -> (~A ^ B) - // (A & B) | (B ^ ~A) -> (~A ^ B) - // (B & A) | (~A ^ B) -> (~A ^ B) - // (B & A) | (B ^ ~A) -> (~A ^ B) - // The match order is important: match the xor first because the 'not' - // operation defines 'A'. We do not need to match the xor as Op0 because the - // xor was canonicalized to Op1 above. - if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && - match(Op0, m_c_And(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(Builder.CreateNot(A), B); - if (SwappedForXor) std::swap(Op0, Op1); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 60d1cde971dd..a8faaecb5c34 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1814,9 +1814,21 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); Value *CmpQ = Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); - auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And - : Instruction::Or; - return BinaryOperator::Create(LogicOpc, CmpP, CmpQ); + auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + return BinaryOperator::Create(BOpc, CmpP, CmpQ); + } + + // Are we using xors to bitwise check for a pair of (in)equalities? Convert to + // a shorter form that has more potential to be folded even further. + Value *X1, *X2, *X3, *X4; + if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) && + match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) { + // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4) + // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4) + Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2); + Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4); + auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + return BinaryOperator::Create(BOpc, Cmp12, Cmp34); } return nullptr; @@ -3737,6 +3749,11 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, const APInt &CVal = CI->getValue(); if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) return nullptr; + } else { + // In this case we could have the operand of the binary operation + // being defined in another block, and performing the replacement + // could break the dominance relation. + return nullptr; } } else { // Other uses prohibit this transformation. @@ -3856,18 +3873,17 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, } else if (BinaryOperator *BO = dyn_cast(U)) { assert(BO->getOpcode() == Instruction::And); // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) - Value *ShortMask = - Builder.CreateTrunc(BO->getOperand(1), Builder.getIntNTy(MulWidth)); + ConstantInt *CI = cast(BO->getOperand(1)); + APInt ShortMask = CI->getValue().trunc(MulWidth); Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask); - Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType()); - if (auto *ZextI = dyn_cast(Zext)) - IC.Worklist.Add(ZextI); + Instruction *Zext = + cast(Builder.CreateZExt(ShortAnd, BO->getType())); + IC.Worklist.Add(Zext); IC.replaceInstUsesWith(*BO, Zext); } else { llvm_unreachable("Unexpected Binary operation"); } - if (auto *UI = dyn_cast(U)) - IC.Worklist.Add(UI); + IC.Worklist.Add(cast(U)); } } if (isa(OtherVal)) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c59e1ce69ac2..451036545741 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -998,8 +998,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); + StoreInst *SI = new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + SI->setDebugLoc(LI.getDebugLoc()); return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 5689c0604239..a20f474cbf40 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -417,8 +417,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // the highest demanded bit, we just return the other side. if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - // We can't do this with the LHS for subtraction. - if (I->getOpcode() == Instruction::Add && + // We can't do this with the LHS for subtraction, unless we are only + // demanding the LSB. + if ((I->getOpcode() == Instruction::Add || + DemandedFromOps.isOneValue()) && DemandedFromOps.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 90e232399155..c7766568fd9d 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -636,17 +636,35 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); + Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I)); + // Do "A op C" and "B op C" both simplify? - if (Value *L = - SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) - if (Value *R = - SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) { - // They do! Return "L op' R". - ++NumExpand; - C = Builder.CreateBinOp(InnerOpcode, L, R); - C->takeName(&I); - return C; - } + if (L && R) { + // They do! Return "L op' R". + ++NumExpand; + C = Builder.CreateBinOp(InnerOpcode, L, R); + C->takeName(&I); + return C; + } + + // Does "A op C" simplify to the identity value for the inner opcode? + if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { + // They do! Return "B op C". + ++NumExpand; + C = Builder.CreateBinOp(TopLevelOpcode, B, C); + C->takeName(&I); + return C; + } + + // Does "B op C" simplify to the identity value for the inner opcode? + if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { + // They do! Return "A op C". + ++NumExpand; + C = Builder.CreateBinOp(TopLevelOpcode, A, C); + C->takeName(&I); + return C; + } } if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { @@ -655,17 +673,35 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' + Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I)); + Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); + // Do "A op B" and "A op C" both simplify? - if (Value *L = - SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I))) - if (Value *R = - SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) { - // They do! Return "L op' R". - ++NumExpand; - A = Builder.CreateBinOp(InnerOpcode, L, R); - A->takeName(&I); - return A; - } + if (L && R) { + // They do! Return "L op' R". + ++NumExpand; + A = Builder.CreateBinOp(InnerOpcode, L, R); + A->takeName(&I); + return A; + } + + // Does "A op B" simplify to the identity value for the inner opcode? + if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) { + // They do! Return "A op C". + ++NumExpand; + A = Builder.CreateBinOp(TopLevelOpcode, A, C); + A->takeName(&I); + return A; + } + + // Does "A op C" simplify to the identity value for the inner opcode? + if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) { + // They do! Return "A op B". + ++NumExpand; + A = Builder.CreateBinOp(TopLevelOpcode, A, B); + A->takeName(&I); + return A; + } } // (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0)) diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 184940b7ea58..057f746e052d 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -22,9 +22,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,6 +45,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" @@ -192,6 +195,11 @@ static cl::opt ClMaxInlinePoisoningSize( static cl::opt ClUseAfterReturn("asan-use-after-return", cl::desc("Check stack-use-after-return"), cl::Hidden, cl::init(true)); +static cl::opt ClRedzoneByvalArgs("asan-redzone-byval-args", + cl::desc("Create redzones for byval " + "arguments (extra copy " + "required)"), cl::Hidden, + cl::init(true)); static cl::opt ClUseAfterScope("asan-use-after-scope", cl::desc("Check stack-use-after-scope"), cl::Hidden, cl::init(false)); @@ -747,6 +755,9 @@ struct FunctionStackPoisoner : public InstVisitor { bool runOnFunction() { if (!ClStack) return false; + + if (ClRedzoneByvalArgs) copyArgsPassedByValToAllocas(); + // Collect alloca, ret, lifetime instructions etc. for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); @@ -763,6 +774,11 @@ struct FunctionStackPoisoner : public InstVisitor { return true; } + // Arguments marked with the "byval" attribute are implicitly copied without + // using an alloca instruction. To produce redzones for those arguments, we + // copy them a second time into memory allocated with an alloca instruction. + void copyArgsPassedByValToAllocas(); + // Finds all Alloca instructions and puts // poisoned red zones around all of them. // Then unpoison everything back before the function returns. @@ -2528,6 +2544,28 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) { llvm_unreachable("impossible LocalStackSize"); } +void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { + BasicBlock &FirstBB = *F.begin(); + IRBuilder<> IRB(&FirstBB, FirstBB.getFirstInsertionPt()); + const DataLayout &DL = F.getParent()->getDataLayout(); + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) { + Type *Ty = Arg.getType()->getPointerElementType(); + unsigned Align = Arg.getParamAlignment(); + if (Align == 0) Align = DL.getABITypeAlignment(Ty); + + const std::string &Name = Arg.hasName() ? Arg.getName().str() : + "Arg" + llvm::to_string(Arg.getArgNo()); + AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval"); + AI->setAlignment(Align); + Arg.replaceAllUsesWith(AI); + + uint64_t AllocSize = DL.getTypeAllocSize(Ty); + IRB.CreateMemCpy(AI, &Arg, AllocSize, Align); + } + } +} + PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue, Instruction *ThenTerm, diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 1348e0ed0ed0..b7c6271869cd 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3039,7 +3039,7 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVAStartInst(VAStartInst &I) override { - if (F.getCallingConv() == CallingConv::X86_64_Win64) + if (F.getCallingConv() == CallingConv::Win64) return; IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); @@ -3053,7 +3053,7 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVACopyInst(VACopyInst &I) override { - if (F.getCallingConv() == CallingConv::X86_64_Win64) + if (F.getCallingConv() == CallingConv::Win64) return; IRBuilder<> IRB(&I); Value *VAListTag = I.getArgOperand(0); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index e3c36c98ab0d..06fe07598374 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -281,6 +281,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { SanCovTraceSwitchFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy)); + // Make sure smaller parameters are zero-extended to i64 as required by the + // x86_64 ABI. + if (TargetTriple.getArch() == Triple::x86_64) { + for (int i = 0; i < 3; i++) { + SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt); + SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt); + } + SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt); + } + // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 7fd77a082b82..c5c9b2c185d6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -562,13 +562,27 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration, if (!MSSA) return false; + // If MemorySSA has determined that one of EarlierInst or LaterInst does not + // read/write memory, then we can safely return true here. + // FIXME: We could be more aggressive when checking doesNotAccessMemory(), + // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass + // by also checking the MemorySSA MemoryAccess on the instruction. Initial + // experiments suggest this isn't worthwhile, at least for C/C++ code compiled + // with the default optimization pipeline. + auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst); + if (!EarlierMA) + return true; + auto *LaterMA = MSSA->getMemoryAccess(LaterInst); + if (!LaterMA) + return true; + // Since we know LaterDef dominates LaterInst and EarlierInst dominates // LaterInst, if LaterDef dominates EarlierInst then it can't occur between // EarlierInst and LaterInst and neither can any other write that potentially // clobbers LaterInst. MemoryAccess *LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); - return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst)); + return MSSA->dominates(LaterDef, EarlierMA); } bool EarlyCSE::processNode(DomTreeNode *Node) { diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index 0fe72f3f7331..ea28705e684d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1168,6 +1168,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, LI->isVolatile(), LI->getAlignment(), LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); + NewLoad->setDebugLoc(LI->getDebugLoc()); // Transfer the old load's AA tags to the new load. AAMDNodes Tags; diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index a40c22c3fce9..99b4458ea0fa 100644 --- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -805,6 +805,25 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP ConstantInt *One = ConstantInt::get(IndVarTy, 1); // TODO: generalize the predicates here to also match their unsigned variants. if (IsIncreasing) { + bool DecreasedRightValueByOne = false; + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (++i != len) { while (++i < len) { + // ... ---> ... + // } } + Pred = ICmpInst::ICMP_SLT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeSMin(SE, RightSCEV)) { + // while (true) { while (true) { + // if (++i == len) ---> if (++i > len - 1) + // break; break; + // ... ... + // } } + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } + bool FoundExpectedPred = (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) || (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0); @@ -829,16 +848,41 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP return None; } - IRBuilder<> B(Preheader->getTerminator()); - RightValue = B.CreateAdd(RightValue, One); + // We need to increase the right value unless we have already decreased + // it virtually when we replaced EQ with SGT. + if (!DecreasedRightValueByOne) { + IRBuilder<> B(Preheader->getTerminator()); + RightValue = B.CreateAdd(RightValue, One); + } } else { if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by upper limit"; return None; } + assert(!DecreasedRightValueByOne && + "Right value can be decreased only for LatchBrExitIdx == 0!"); } } else { + bool IncreasedRightValueByOne = false; + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (--i != len) { while (--i > len) { + // ... ---> ... + // } } + Pred = ICmpInst::ICMP_SGT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeSMax(SE, RightSCEV)) { + // while (true) { while (true) { + // if (--i == len) ---> if (--i < len + 1) + // break; break; + // ... ... + // } } + Pred = ICmpInst::ICMP_SLT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } + bool FoundExpectedPred = (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) || (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0); @@ -863,14 +907,20 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP return None; } - IRBuilder<> B(Preheader->getTerminator()); - RightValue = B.CreateSub(RightValue, One); + // We need to decrease the right value unless we have already increased + // it virtually when we replaced EQ with SLT. + if (!IncreasedRightValueByOne) { + IRBuilder<> B(Preheader->getTerminator()); + RightValue = B.CreateSub(RightValue, One); + } } else { if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by lower limit"; return None; } + assert(!IncreasedRightValueByOne && + "Right value can be increased only for LatchBrExitIdx == 0!"); } } @@ -922,14 +972,18 @@ LoopConstrainer::calculateSubRanges() const { bool Increasing = MainLoopStructure.IndVarIncreasing; - // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the - // range of values the induction variable takes. + // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or + // [Smallest, GreatestSeen] is the range of values the induction variable + // takes. - const SCEV *Smallest = nullptr, *Greatest = nullptr; + const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr; + const SCEV *One = SE.getOne(Ty); if (Increasing) { Smallest = Start; Greatest = End; + // No overflow, because the range [Smallest, GreatestSeen] is not empty. + GreatestSeen = SE.getMinusSCEV(End, One); } else { // These two computations may sign-overflow. Here is why that is okay: // @@ -947,9 +1001,9 @@ LoopConstrainer::calculateSubRanges() const { // will be an empty range. Returning an empty range is always safe. // - const SCEV *One = SE.getOne(Ty); Smallest = SE.getAddExpr(End, One); Greatest = SE.getAddExpr(Start, One); + GreatestSeen = Start; } auto Clamp = [this, Smallest, Greatest](const SCEV *S) { @@ -964,7 +1018,7 @@ LoopConstrainer::calculateSubRanges() const { Result.LowLimit = Clamp(Range.getBegin()); bool ProvablyNoPostLoop = - SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd()); + SE.isKnownPredicate(ICmpInst::ICMP_SLT, GreatestSeen, Range.getEnd()); if (!ProvablyNoPostLoop) Result.HighLimit = Clamp(Range.getEnd()); diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index ee3de51b1360..4056cc5cb346 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -2168,11 +2168,19 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { return false; } -/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form +/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the +/// same BB in the form /// bb: /// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ... -/// %s = select p, trueval, falseval +/// %s = select %p, trueval, falseval /// +/// or +/// +/// bb: +/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ... +/// %c = cmp %p, 0 +/// %s = select %c, trueval, falseval +// /// And expand the select into a branch structure. This later enables /// jump-threading over bb in this pass. /// @@ -2186,44 +2194,54 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { if (LoopHeaders.count(BB)) return false; - // Look for a Phi/Select pair in the same basic block. The Phi feeds the - // condition of the Select and at least one of the incoming values is a - // constant. for (BasicBlock::iterator BI = BB->begin(); PHINode *PN = dyn_cast(BI); ++BI) { - unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || !PN->hasOneUse()) + // Look for a Phi having at least one constant incoming value. + if (llvm::all_of(PN->incoming_values(), + [](Value *V) { return !isa(V); })) continue; - SelectInst *SI = dyn_cast(PN->user_back()); - if (!SI || SI->getParent() != BB) - continue; - - Value *Cond = SI->getCondition(); - if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1)) - continue; - - bool HasConst = false; - for (unsigned i = 0; i != NumPHIValues; ++i) { - if (PN->getIncomingBlock(i) == BB) + auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) { + // Check if SI is in BB and use V as condition. + if (SI->getParent() != BB) return false; - if (isa(PN->getIncomingValue(i))) - HasConst = true; + Value *Cond = SI->getCondition(); + return (Cond && Cond == V && Cond->getType()->isIntegerTy(1)); + }; + + SelectInst *SI = nullptr; + for (Use &U : PN->uses()) { + if (ICmpInst *Cmp = dyn_cast(U.getUser())) { + // Look for a ICmp in BB that compares PN with a constant and is the + // condition of a Select. + if (Cmp->getParent() == BB && Cmp->hasOneUse() && + isa(Cmp->getOperand(1 - U.getOperandNo()))) + if (SelectInst *SelectI = dyn_cast(Cmp->user_back())) + if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) { + SI = SelectI; + break; + } + } else if (SelectInst *SelectI = dyn_cast(U.getUser())) { + // Look for a Select in BB that uses PN as condtion. + if (isUnfoldCandidate(SelectI, U.get())) { + SI = SelectI; + break; + } + } } - if (HasConst) { - // Expand the select. - TerminatorInst *Term = - SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); - PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); - NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); - NewPN->addIncoming(SI->getFalseValue(), BB); - SI->replaceAllUsesWith(NewPN); - SI->eraseFromParent(); - return true; - } + if (!SI) + continue; + // Expand the select. + TerminatorInst *Term = + SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); + NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); + NewPN->addIncoming(SI->getFalseValue(), BB); + SI->replaceAllUsesWith(NewPN); + SI->eraseFromParent(); + return true; } - return false; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 606136dc31a4..2e0d8e0374c0 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -323,9 +324,10 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { class LoopInterchangeLegality { public: LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, - LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA) + LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, + OptimizationRemarkEmitter *ORE) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {} + PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, @@ -353,6 +355,8 @@ class LoopInterchangeLegality { LoopInfo *LI; DominatorTree *DT; bool PreserveLCSSA; + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter *ORE; bool InnerLoopHasReduction; }; @@ -361,8 +365,9 @@ class LoopInterchangeLegality { /// loop. class LoopInterchangeProfitability { public: - LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) - : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} + LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + OptimizationRemarkEmitter *ORE) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} /// Check if the loop interchange is profitable. bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, @@ -376,6 +381,8 @@ class LoopInterchangeProfitability { /// Scev analysis. ScalarEvolution *SE; + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter *ORE; }; /// LoopInterchangeTransform interchanges the loop. @@ -422,6 +429,9 @@ struct LoopInterchange : public FunctionPass { DependenceInfo *DI; DominatorTree *DT; bool PreserveLCSSA; + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter *ORE; + LoopInterchange() : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) { initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); @@ -435,6 +445,7 @@ struct LoopInterchange : public FunctionPass { AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); + AU.addRequired(); } bool runOnFunction(Function &F) override { @@ -446,6 +457,7 @@ struct LoopInterchange : public FunctionPass { DI = &getAnalysis().getDI(); auto *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; + ORE = &getAnalysis().getORE(); PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); // Build up a worklist of loop pairs to analyze. @@ -575,18 +587,23 @@ struct LoopInterchange : public FunctionPass { Loop *OuterLoop = LoopList[OuterLoopId]; LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, - PreserveLCSSA); + PreserveLCSSA, ORE); if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); return false; } DEBUG(dbgs() << "Loops are legal to interchange\n"); - LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE); + LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { DEBUG(dbgs() << "Interchanging loops not profitable\n"); return false; } + ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Loop interchanged with enclosing loop."); + LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit, LIL.hasInnerLoopReduction()); LIT.transform(); @@ -760,6 +777,12 @@ bool LoopInterchangeLegality::currentLimitations() { if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedPHIInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with induction or reduction PHI nodes can be" + " interchange currently."); return true; } @@ -767,6 +790,12 @@ bool LoopInterchangeLegality::currentLimitations() { if (Inductions.size() != 1) { DEBUG(dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "MultiInductionInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with 1 induction variable can be " + "interchanged currently."); return true; } if (Reductions.size() > 0) @@ -777,6 +806,12 @@ bool LoopInterchangeLegality::currentLimitations() { if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with induction or reduction PHI nodes can be" + " interchanged currently."); return true; } @@ -785,18 +820,35 @@ bool LoopInterchangeLegality::currentLimitations() { if (!Reductions.empty()) { DEBUG(dbgs() << "Outer loops with reductions are not supported " << "currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "ReductionsOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Outer loops with reductions cannot be interchangeed " + "currently."); return true; } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { DEBUG(dbgs() << "Loops with more than 1 induction variables are not " << "supported currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "MultiIndutionOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with 1 induction variable can be " + "interchanged currently."); return true; } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { DEBUG(dbgs() << "Loop structure not understood by pass\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedStructureInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Inner loop structure not understood currently."); return true; } @@ -805,12 +857,24 @@ bool LoopInterchangeLegality::currentLimitations() { getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) { DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoLCSSAPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with LCSSA PHIs can be interchange " + "currently."); return true; } LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) { DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoLCSSAPHIOuterInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with LCSSA PHIs can be interchange " + "currently."); return true; } @@ -835,6 +899,11 @@ bool LoopInterchangeLegality::currentLimitations() { if (!InnerIndexVarInc) { DEBUG(dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoIncrementInInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "The inner loop does not increment the induction variable."); return true; } @@ -852,6 +921,12 @@ bool LoopInterchangeLegality::currentLimitations() { if (!I.isIdenticalTo(InnerIndexVarInc)) { DEBUG(dbgs() << "Found unsupported instructions between induction " << "variable increment and branch.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "UnsupportedInsBetweenInduction", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Found unsupported instruction between induction variable " + "increment and branch."); return true; } @@ -862,6 +937,11 @@ bool LoopInterchangeLegality::currentLimitations() { // current limitation. if (!FoundInduction) { DEBUG(dbgs() << "Did not find the induction variable.\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NoIndutionVariable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Did not find the induction variable."); return true; } return false; @@ -875,6 +955,11 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "Dependence", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops due to dependences."); return false; } @@ -910,6 +995,12 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, // Check if the loops are tightly nested. if (!tightlyNested(OuterLoop, InnerLoop)) { DEBUG(dbgs() << "Loops not tightly nested\n"); + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "NotTightlyNested", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops because they are not tightly " + "nested."); return false; } @@ -1005,9 +1096,18 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, // It is not profitable as per current cache profitability model. But check if // we can move this loop outside to improve parallelism. - bool ImprovesPar = - isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix); - return ImprovesPar; + if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) + return true; + + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, + "InterchangeNotProfitable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Interchanging loops is too costly (cost=" + << ore::NV("Cost", Cost) << ", threshold=" + << ore::NV("Threshold", LoopInterchangeCostThreshold) << + ") and it does not improve parallelism."); + return false; } void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, @@ -1291,6 +1391,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 9397b87cdf56..90c5c243f464 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" @@ -90,16 +91,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); /// If it contains any dynamic allocas, returns false. static bool canTRE(Function &F) { // Because of PR962, we don't TRE dynamic allocas. - for (auto &BB : F) { - for (auto &I : BB) { - if (AllocaInst *AI = dyn_cast(&I)) { - if (!AI->isStaticAlloca()) - return false; - } - } - } - - return true; + return llvm::all_of(instructions(F), [](Instruction &I) { + auto *AI = dyn_cast(&I); + return !AI || AI->isStaticAlloca(); + }); } namespace { diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 5170c68e2915..d43ce7abb7cd 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -736,7 +737,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. Also update the dominator info for - // OtherExits, since we have new edges into OtherExits. + // OtherExits and their immediate successors, since we have new edges into + // OtherExits. + SmallSet ImmediateSuccessorsOfExitBlocks; for (auto *BB : OtherExits) { for (auto &II : *BB) { @@ -759,12 +762,35 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, cast(VMap[Phi->getIncomingBlock(i)])); } } +#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) + for (BasicBlock *SuccBB : successors(BB)) { + assert(!(any_of(OtherExits, + [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || + SuccBB == LatchExit) && + "Breaks the definition of dedicated exits!"); + } +#endif // Update the dominator info because the immediate dominator is no longer the // header of the original Loop. BB has edges both from L and remainder code. // Since the preheader determines which loop is run (L or directly jump to // the remainder code), we set the immediate dominator as the preheader. - if (DT) + if (DT) { DT->changeImmediateDominator(BB, PreHeader); + // Also update the IDom for immediate successors of BB. If the current + // IDom is the header, update the IDom to be the preheader because that is + // the nearest common dominator of all predecessors of SuccBB. We need to + // check for IDom being the header because successors of exit blocks can + // have edges from outside the loop, and we should not incorrectly update + // the IDom in that case. + for (BasicBlock *SuccBB: successors(BB)) + if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { + if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { + assert(!SuccBB->getSinglePredecessor() && + "BB should be the IDom then!"); + DT->changeImmediateDominator(SuccBB, PreHeader); + } + } + } } // Loop structure should be the following: diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index eb82ee283d44..012b10c8a9b0 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -574,11 +574,9 @@ class InnerLoopVectorizer { /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(Loop *NewLoop); - /// Emit a bypass check to see if the trip count would overflow, or we - /// wouldn't have enough iterations to execute one vector loop. + /// Emit a bypass check to see if the vector trip count is zero, including if + /// it overflows. void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); - /// Emit a bypass check to see if the vector trip count is nonzero. - void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass); /// Emit a bypass check to see if all of the SCEV assumptions we've /// had to make are correct. void emitSCEVChecks(Loop *L, BasicBlock *Bypass); @@ -3289,37 +3287,16 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, BasicBlock *BB = L->getLoopPreheader(); IRBuilder<> Builder(BB->getTerminator()); - // Generate code to check that the loop's trip count that we computed by - // adding one to the backedge-taken count will not overflow. - Value *CheckMinIters = Builder.CreateICmpULT( - Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); + // Generate code to check if the loop's trip count is less than VF * UF, or + // equal to it in case a scalar epilogue is required; this implies that the + // vector trip count is zero. This check also covers the case where adding one + // to the backedge-taken count overflowed leading to an incorrect trip count + // of zero. In this case we will also jump to the scalar loop. + auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE + : ICmpInst::ICMP_ULT; + Value *CheckMinIters = Builder.CreateICmp( + P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); - BasicBlock *NewBB = - BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked"); - // Update dominator tree immediately if the generated block is a - // LoopBypassBlock because SCEV expansions to generate loop bypass - // checks may query it before the current function is finished. - DT->addNewBlock(NewBB, BB); - if (L->getParentLoop()) - L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); - ReplaceInstWithInst(BB->getTerminator(), - BranchInst::Create(Bypass, NewBB, CheckMinIters)); - LoopBypassBlocks.push_back(BB); -} - -void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, - BasicBlock *Bypass) { - Value *TC = getOrCreateVectorTripCount(L); - BasicBlock *BB = L->getLoopPreheader(); - IRBuilder<> Builder(BB->getTerminator()); - - // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. - Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()), - "cmp.zero"); - - // Generate code to check that the loop's trip count that we computed by - // adding one to the backedge-taken count will not overflow. BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); // Update dominator tree immediately if the generated block is a // LoopBypassBlock because SCEV expansions to generate loop bypass @@ -3328,7 +3305,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, if (L->getParentLoop()) L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); ReplaceInstWithInst(BB->getTerminator(), - BranchInst::Create(Bypass, NewBB, Cmp)); + BranchInst::Create(Bypass, NewBB, CheckMinIters)); LoopBypassBlocks.push_back(BB); } @@ -3477,14 +3454,13 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() { Value *StartIdx = ConstantInt::get(IdxTy, 0); - // We need to test whether the backedge-taken count is uint##_max. Adding one - // to it will cause overflow and an incorrect loop trip count in the vector - // body. In case of overflow we want to directly jump to the scalar remainder - // loop. - emitMinimumIterationCountCheck(Lp, ScalarPH); // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. - emitVectorLoopEnteredCheck(Lp, ScalarPH); + // jump to the scalar loop. This check also covers the case where the + // backedge-taken count is uint##_max: adding one to it will overflow leading + // to an incorrect trip count of zero. In this (rare) case we will also jump + // to the scalar loop. + emitMinimumIterationCountCheck(Lp, ScalarPH); + // Generate the code to check any assumptions that we've made for SCEV // expressions. emitSCEVChecks(Lp, ScalarPH); @@ -3527,7 +3503,7 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() { // We know what the end value is. EndValue = CountRoundDown; } else { - IRBuilder<> B(LoopBypassBlocks.back()->getTerminator()); + IRBuilder<> B(Lp->getLoopPreheader()->getTerminator()); Type *StepType = II.getStep()->getType(); Instruction::CastOps CastOp = CastInst::getCastOpcode(CountRoundDown, true, StepType, true); @@ -4168,7 +4144,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // To do so, we need to generate the 'identity' vector and override // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator()); + Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); // This is the vector-clone of the value that leaves the loop. Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType(); diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4425043ad39a..dcbcab459a6b 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -434,7 +434,7 @@ class BoUpSLP { /// \returns the pointer to the vectorized value if \p VL is already /// vectorized, or NULL. They may happen in cycles. - Value *alreadyVectorized(ArrayRef VL) const; + Value *alreadyVectorized(ArrayRef VL, Value *OpValue) const; /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. @@ -857,7 +857,7 @@ class BoUpSLP { /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are /// actually moved at this stage. - bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP); + bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, Value *OpValue); /// Un-bundles a group of instructions. void cancelScheduling(ArrayRef VL, Value *OpValue); @@ -1212,7 +1212,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Check that all of the users of the scalars that we want to vectorize are // schedulable. Instruction *VL0 = cast(VL[0]); - BasicBlock *BB = cast(VL0)->getParent(); + BasicBlock *BB = VL0->getParent(); if (!DT->isReachableFromEntry(BB)) { // Don't go into unreachable blocks. They may contain instructions with @@ -1237,7 +1237,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, this)) { + if (!BS.tryScheduleBundle(VL, this, VL0)) { DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL[0]) || !BS.getScheduleData(VL[0])->isPartOfBundle()) && @@ -2427,8 +2427,8 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { return Vec; } -Value *BoUpSLP::alreadyVectorized(ArrayRef VL) const { - if (const TreeEntry *En = getTreeEntry(VL[0])) { +Value *BoUpSLP::alreadyVectorized(ArrayRef VL, Value *OpValue) const { + if (const TreeEntry *En = getTreeEntry(OpValue)) { if (En->isSame(VL) && En->VectorizedValue) return En->VectorizedValue; } @@ -2553,7 +2553,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *InVec = vectorizeTree(INVL); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; CastInst *CI = dyn_cast(VL0); @@ -2575,7 +2575,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *L = vectorizeTree(LHSV); Value *R = vectorizeTree(RHSV); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; CmpInst::Predicate P0 = cast(VL0)->getPredicate(); @@ -2604,7 +2604,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *True = vectorizeTree(TrueVec); Value *False = vectorizeTree(FalseVec); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; Value *V = Builder.CreateSelect(Cond, True, False); @@ -2644,7 +2644,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; BinaryOperator *BinOp = cast(VL0); @@ -2806,7 +2806,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); - if (Value *V = alreadyVectorized(E->Scalars)) + if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; // Create a vector of LHS op1 RHS @@ -3097,8 +3097,8 @@ void BoUpSLP::optimizeGatherSequence() { // Groups the instructions to a bundle (which is then a single scheduling entity) // and schedules instructions until the bundle gets ready. bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, - BoUpSLP *SLP) { - if (isa(VL[0])) + BoUpSLP *SLP, Value *OpValue) { + if (isa(OpValue)) return true; // Initialize the instruction bundle. @@ -3106,7 +3106,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, ScheduleData *PrevInBundle = nullptr; ScheduleData *Bundle = nullptr; bool ReSchedule = false; - DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n"); + DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n"); // Make sure that the scheduling region contains all // instructions of the bundle. @@ -3177,7 +3177,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, } } if (!Bundle->isReady()) { - cancelScheduling(VL, VL[0]); + cancelScheduling(VL, OpValue); return false; } return true; diff --git a/contrib/llvm/tools/clang/include/clang-c/Index.h b/contrib/llvm/tools/clang/include/clang-c/Index.h index 09f4403556c8..b35f436e91b6 100644 --- a/contrib/llvm/tools/clang/include/clang-c/Index.h +++ b/contrib/llvm/tools/clang/include/clang-c/Index.h @@ -3205,7 +3205,7 @@ enum CXCallingConv { CXCallingConv_AAPCS_VFP = 7, CXCallingConv_X86RegCall = 8, CXCallingConv_IntelOclBicc = 9, - CXCallingConv_X86_64Win64 = 10, + CXCallingConv_Win64 = 10, CXCallingConv_X86_64SysV = 11, CXCallingConv_X86VectorCall = 12, CXCallingConv_Swift = 13, diff --git a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h index 3b46d31458ce..703f588c5663 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h +++ b/contrib/llvm/tools/clang/include/clang/AST/ASTContext.h @@ -1441,6 +1441,10 @@ class ASTContext : public RefCountedBase { /// The sizeof operator requires this (C99 6.5.3.4p4). CanQualType getSizeType() const; + /// \brief Return the unique signed counterpart of + /// the integer type corresponding to size_t. + CanQualType getSignedSizeType() const; + /// \brief Return the unique type for "intmax_t" (C99 7.18.1.5), defined in /// . CanQualType getIntMaxType() const; diff --git a/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h b/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h index 26c0cbe82d17..1cd6e004f751 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h +++ b/contrib/llvm/tools/clang/include/clang/AST/DeclObjC.h @@ -1039,10 +1039,9 @@ class ObjCContainerDecl : public NamedDecl, public DeclContext { typedef llvm::DenseMap, ObjCPropertyDecl*> PropertyMap; - - typedef llvm::DenseMap - ProtocolPropertyMap; - + + typedef llvm::SmallDenseSet ProtocolPropertySet; + typedef llvm::SmallVector PropertyDeclOrder; /// This routine collects list of properties to be implemented in the class. @@ -2159,7 +2158,8 @@ class ObjCProtocolDecl : public ObjCContainerDecl, PropertyDeclOrder &PO) const override; void collectInheritedProtocolProperties(const ObjCPropertyDecl *Property, - ProtocolPropertyMap &PM) const; + ProtocolPropertySet &PS, + PropertyDeclOrder &PO) const; static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == ObjCProtocol; } diff --git a/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h b/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h index 14e73819f53d..a1cae8e18f84 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h +++ b/contrib/llvm/tools/clang/include/clang/AST/OpenMPClause.h @@ -1890,6 +1890,217 @@ class OMPReductionClause final } }; +/// This represents clause 'task_reduction' in the '#pragma omp taskgroup' +/// directives. +/// +/// \code +/// #pragma omp taskgroup task_reduction(+:a,b) +/// \endcode +/// In this example directive '#pragma omp taskgroup' has clause +/// 'task_reduction' with operator '+' and the variables 'a' and 'b'. +/// +class OMPTaskReductionClause final + : public OMPVarListClause, + public OMPClauseWithPostUpdate, + private llvm::TrailingObjects { + friend TrailingObjects; + friend OMPVarListClause; + friend class OMPClauseReader; + /// Location of ':'. + SourceLocation ColonLoc; + /// Nested name specifier for C++. + NestedNameSpecifierLoc QualifierLoc; + /// Name of custom operator. + DeclarationNameInfo NameInfo; + + /// Build clause with number of variables \a N. + /// + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + /// \param ColonLoc Location of ':'. + /// \param N Number of the variables in the clause. + /// \param QualifierLoc The nested-name qualifier with location information + /// \param NameInfo The full name info for reduction identifier. + /// + OMPTaskReductionClause(SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation ColonLoc, SourceLocation EndLoc, + unsigned N, NestedNameSpecifierLoc QualifierLoc, + const DeclarationNameInfo &NameInfo) + : OMPVarListClause(OMPC_task_reduction, StartLoc, + LParenLoc, EndLoc, N), + OMPClauseWithPostUpdate(this), ColonLoc(ColonLoc), + QualifierLoc(QualifierLoc), NameInfo(NameInfo) {} + + /// Build an empty clause. + /// + /// \param N Number of variables. + /// + explicit OMPTaskReductionClause(unsigned N) + : OMPVarListClause( + OMPC_task_reduction, SourceLocation(), SourceLocation(), + SourceLocation(), N), + OMPClauseWithPostUpdate(this), ColonLoc(), QualifierLoc(), NameInfo() {} + + /// Sets location of ':' symbol in clause. + void setColonLoc(SourceLocation CL) { ColonLoc = CL; } + /// Sets the name info for specified reduction identifier. + void setNameInfo(DeclarationNameInfo DNI) { NameInfo = DNI; } + /// Sets the nested name specifier. + void setQualifierLoc(NestedNameSpecifierLoc NSL) { QualifierLoc = NSL; } + + /// Set list of helper expressions, required for proper codegen of the clause. + /// These expressions represent private copy of the reduction variable. + void setPrivates(ArrayRef Privates); + + /// Get the list of helper privates. + MutableArrayRef getPrivates() { + return MutableArrayRef(varlist_end(), varlist_size()); + } + ArrayRef getPrivates() const { + return llvm::makeArrayRef(varlist_end(), varlist_size()); + } + + /// Set list of helper expressions, required for proper codegen of the clause. + /// These expressions represent LHS expression in the final reduction + /// expression performed by the reduction clause. + void setLHSExprs(ArrayRef LHSExprs); + + /// Get the list of helper LHS expressions. + MutableArrayRef getLHSExprs() { + return MutableArrayRef(getPrivates().end(), varlist_size()); + } + ArrayRef getLHSExprs() const { + return llvm::makeArrayRef(getPrivates().end(), varlist_size()); + } + + /// Set list of helper expressions, required for proper codegen of the clause. + /// These expressions represent RHS expression in the final reduction + /// expression performed by the reduction clause. Also, variables in these + /// expressions are used for proper initialization of reduction copies. + void setRHSExprs(ArrayRef RHSExprs); + + /// Get the list of helper destination expressions. + MutableArrayRef getRHSExprs() { + return MutableArrayRef(getLHSExprs().end(), varlist_size()); + } + ArrayRef getRHSExprs() const { + return llvm::makeArrayRef(getLHSExprs().end(), varlist_size()); + } + + /// Set list of helper reduction expressions, required for proper + /// codegen of the clause. These expressions are binary expressions or + /// operator/custom reduction call that calculates new value from source + /// helper expressions to destination helper expressions. + void setReductionOps(ArrayRef ReductionOps); + + /// Get the list of helper reduction expressions. + MutableArrayRef getReductionOps() { + return MutableArrayRef(getRHSExprs().end(), varlist_size()); + } + ArrayRef getReductionOps() const { + return llvm::makeArrayRef(getRHSExprs().end(), varlist_size()); + } + +public: + /// Creates clause with a list of variables \a VL. + /// + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param ColonLoc Location of ':'. + /// \param EndLoc Ending location of the clause. + /// \param VL The variables in the clause. + /// \param QualifierLoc The nested-name qualifier with location information + /// \param NameInfo The full name info for reduction identifier. + /// \param Privates List of helper expressions for proper generation of + /// private copies. + /// \param LHSExprs List of helper expressions for proper generation of + /// assignment operation required for copyprivate clause. This list represents + /// LHSs of the reduction expressions. + /// \param RHSExprs List of helper expressions for proper generation of + /// assignment operation required for copyprivate clause. This list represents + /// RHSs of the reduction expressions. + /// Also, variables in these expressions are used for proper initialization of + /// reduction copies. + /// \param ReductionOps List of helper expressions that represents reduction + /// expressions: + /// \code + /// LHSExprs binop RHSExprs; + /// operator binop(LHSExpr, RHSExpr); + /// (LHSExpr, RHSExpr); + /// \endcode + /// Required for proper codegen of final reduction operation performed by the + /// reduction clause. + /// \param PreInit Statement that must be executed before entering the OpenMP + /// region with this clause. + /// \param PostUpdate Expression that must be executed after exit from the + /// OpenMP region with this clause. + /// + static OMPTaskReductionClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef VL, + NestedNameSpecifierLoc QualifierLoc, + const DeclarationNameInfo &NameInfo, ArrayRef Privates, + ArrayRef LHSExprs, ArrayRef RHSExprs, + ArrayRef ReductionOps, Stmt *PreInit, Expr *PostUpdate); + + /// Creates an empty clause with the place for \a N variables. + /// + /// \param C AST context. + /// \param N The number of variables. + /// + static OMPTaskReductionClause *CreateEmpty(const ASTContext &C, unsigned N); + + /// Gets location of ':' symbol in clause. + SourceLocation getColonLoc() const { return ColonLoc; } + /// Gets the name info for specified reduction identifier. + const DeclarationNameInfo &getNameInfo() const { return NameInfo; } + /// Gets the nested name specifier. + NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; } + + typedef MutableArrayRef::iterator helper_expr_iterator; + typedef ArrayRef::iterator helper_expr_const_iterator; + typedef llvm::iterator_range helper_expr_range; + typedef llvm::iterator_range + helper_expr_const_range; + + helper_expr_const_range privates() const { + return helper_expr_const_range(getPrivates().begin(), getPrivates().end()); + } + helper_expr_range privates() { + return helper_expr_range(getPrivates().begin(), getPrivates().end()); + } + helper_expr_const_range lhs_exprs() const { + return helper_expr_const_range(getLHSExprs().begin(), getLHSExprs().end()); + } + helper_expr_range lhs_exprs() { + return helper_expr_range(getLHSExprs().begin(), getLHSExprs().end()); + } + helper_expr_const_range rhs_exprs() const { + return helper_expr_const_range(getRHSExprs().begin(), getRHSExprs().end()); + } + helper_expr_range rhs_exprs() { + return helper_expr_range(getRHSExprs().begin(), getRHSExprs().end()); + } + helper_expr_const_range reduction_ops() const { + return helper_expr_const_range(getReductionOps().begin(), + getReductionOps().end()); + } + helper_expr_range reduction_ops() { + return helper_expr_range(getReductionOps().begin(), + getReductionOps().end()); + } + + child_range children() { + return child_range(reinterpret_cast(varlist_begin()), + reinterpret_cast(varlist_end())); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == OMPC_task_reduction; + } +}; + /// \brief This represents clause 'linear' in the '#pragma omp ...' /// directives. /// diff --git a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h index 917b240428e7..e7f271cc0812 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3016,6 +3016,28 @@ RecursiveASTVisitor::VisitOMPReductionClause(OMPReductionClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPTaskReductionClause( + OMPTaskReductionClause *C) { + TRY_TO(TraverseNestedNameSpecifierLoc(C->getQualifierLoc())); + TRY_TO(TraverseDeclarationNameInfo(C->getNameInfo())); + TRY_TO(VisitOMPClauseList(C)); + TRY_TO(VisitOMPClauseWithPostUpdate(C)); + for (auto *E : C->privates()) { + TRY_TO(TraverseStmt(E)); + } + for (auto *E : C->lhs_exprs()) { + TRY_TO(TraverseStmt(E)); + } + for (auto *E : C->rhs_exprs()) { + TRY_TO(TraverseStmt(E)); + } + for (auto *E : C->reduction_ops()) { + TRY_TO(TraverseStmt(E)); + } + return true; +} + template bool RecursiveASTVisitor::VisitOMPFlushClause(OMPFlushClause *C) { TRY_TO(VisitOMPClauseList(C)); diff --git a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h index 463af06fddab..09dd87fdc8bc 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h +++ b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h @@ -1895,7 +1895,7 @@ class OMPTaskwaitDirective : public OMPExecutableDirective { } }; -/// \brief This represents '#pragma omp taskgroup' directive. +/// This represents '#pragma omp taskgroup' directive. /// /// \code /// #pragma omp taskgroup @@ -1903,39 +1903,45 @@ class OMPTaskwaitDirective : public OMPExecutableDirective { /// class OMPTaskgroupDirective : public OMPExecutableDirective { friend class ASTStmtReader; - /// \brief Build directive with the given start and end location. + /// Build directive with the given start and end location. /// /// \param StartLoc Starting location of the directive kind. /// \param EndLoc Ending location of the directive. + /// \param NumClauses Number of clauses. /// - OMPTaskgroupDirective(SourceLocation StartLoc, SourceLocation EndLoc) + OMPTaskgroupDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumClauses) : OMPExecutableDirective(this, OMPTaskgroupDirectiveClass, OMPD_taskgroup, - StartLoc, EndLoc, 0, 1) {} + StartLoc, EndLoc, NumClauses, 1) {} - /// \brief Build an empty directive. + /// Build an empty directive. + /// \param NumClauses Number of clauses. /// - explicit OMPTaskgroupDirective() + explicit OMPTaskgroupDirective(unsigned NumClauses) : OMPExecutableDirective(this, OMPTaskgroupDirectiveClass, OMPD_taskgroup, - SourceLocation(), SourceLocation(), 0, 1) {} + SourceLocation(), SourceLocation(), NumClauses, + 1) {} public: - /// \brief Creates directive. + /// Creates directive. /// /// \param C AST context. /// \param StartLoc Starting location of the directive kind. /// \param EndLoc Ending Location of the directive. + /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// - static OMPTaskgroupDirective *Create(const ASTContext &C, - SourceLocation StartLoc, - SourceLocation EndLoc, - Stmt *AssociatedStmt); + static OMPTaskgroupDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *AssociatedStmt); - /// \brief Creates an empty directive. + /// Creates an empty directive. /// /// \param C AST context. + /// \param NumClauses Number of clauses. /// - static OMPTaskgroupDirective *CreateEmpty(const ASTContext &C, EmptyShell); + static OMPTaskgroupDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, EmptyShell); static bool classof(const Stmt *T) { return T->getStmtClass() == OMPTaskgroupDirectiveClass; diff --git a/contrib/llvm/tools/clang/include/clang/AST/Type.h b/contrib/llvm/tools/clang/include/clang/AST/Type.h index 9eb6d81296d8..64bd3c701985 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/Type.h +++ b/contrib/llvm/tools/clang/include/clang/AST/Type.h @@ -3878,6 +3878,7 @@ class AttributedType : public Type, public llvm::FoldingSetNode { attr_sptr, attr_uptr, attr_nonnull, + attr_ns_returns_retained, attr_nullable, attr_null_unspecified, attr_objc_kindof, diff --git a/contrib/llvm/tools/clang/include/clang/Analysis/Analyses/Dominators.h b/contrib/llvm/tools/clang/include/clang/Analysis/Analyses/Dominators.h index 1229f8a8efac..38010e1ee1d8 100644 --- a/contrib/llvm/tools/clang/include/clang/Analysis/Analyses/Dominators.h +++ b/contrib/llvm/tools/clang/include/clang/Analysis/Analyses/Dominators.h @@ -38,15 +38,15 @@ typedef llvm::DomTreeNodeBase DomTreeNode; class DominatorTree : public ManagedAnalysis { virtual void anchor(); public: - llvm::DominatorTreeBase* DT; + llvm::DomTreeBase* DT; DominatorTree() { - DT = new llvm::DominatorTreeBase(false); + DT = new llvm::DomTreeBase(); } ~DominatorTree() override { delete DT; } - llvm::DominatorTreeBase& getBase() { return *DT; } + llvm::DomTreeBase& getBase() { return *DT; } /// \brief This method returns the root CFGBlock of the dominators tree. /// diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td index bc36fd8c8297..f13e13b0107b 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td @@ -1802,11 +1802,18 @@ def Target : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag>; let Documentation = [TargetDocs]; let AdditionalMembers = [{ - typedef std::pair, StringRef> ParsedTargetAttr; + struct ParsedTargetAttr { + std::vector Features; + StringRef Architecture; + bool DuplicateArchitecture = false; + }; ParsedTargetAttr parse() const { + return parse(getFeaturesStr()); + } + static ParsedTargetAttr parse(StringRef Features) { ParsedTargetAttr Ret; SmallVector AttrFeatures; - getFeaturesStr().split(AttrFeatures, ","); + Features.split(AttrFeatures, ","); // Grab the various features and prepend a "+" to turn on the feature to // the backend and add them to our existing set of features. @@ -1823,12 +1830,15 @@ def Target : InheritableAttr { continue; // While we're here iterating check for a different target cpu. - if (Feature.startswith("arch=")) - Ret.second = Feature.split("=").second.trim(); - else if (Feature.startswith("no-")) - Ret.first.push_back("-" + Feature.split("-").second.str()); + if (Feature.startswith("arch=")) { + if (!Ret.Architecture.empty()) + Ret.DuplicateArchitecture = true; + else + Ret.Architecture = Feature.split("=").second.trim(); + } else if (Feature.startswith("no-")) + Ret.Features.push_back("-" + Feature.split("-").second.str()); else - Ret.first.push_back("+" + Feature.str()); + Ret.Features.push_back("+" + Feature.str()); } return Ret; } diff --git a/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td b/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td index 2987f07d8bb4..33ef3ea4cade 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td @@ -910,13 +910,13 @@ the function declaration for a hypothetical function ``f``: void f(void) __attribute__((availability(macos,introduced=10.4,deprecated=10.6,obsoleted=10.7))); -The availability attribute states that ``f`` was introduced in Mac OS X 10.4, -deprecated in Mac OS X 10.6, and obsoleted in Mac OS X 10.7. This information +The availability attribute states that ``f`` was introduced in macOS 10.4, +deprecated in macOS 10.6, and obsoleted in macOS 10.7. This information is used by Clang to determine when it is safe to use ``f``: for example, if -Clang is instructed to compile code for Mac OS X 10.5, a call to ``f()`` -succeeds. If Clang is instructed to compile code for Mac OS X 10.6, the call +Clang is instructed to compile code for macOS 10.5, a call to ``f()`` +succeeds. If Clang is instructed to compile code for macOS 10.6, the call succeeds but Clang emits a warning specifying that the function is deprecated. -Finally, if Clang is instructed to compile code for Mac OS X 10.7, the call +Finally, if Clang is instructed to compile code for macOS 10.7, the call fails because ``f()`` is no longer available. The availability attribute is a comma-separated list starting with the @@ -961,7 +961,7 @@ are: command-line arguments. ``macos`` - Apple's Mac OS X operating system. The minimum deployment target is + Apple's macOS operating system. The minimum deployment target is specified by the ``-mmacosx-version-min=*version*`` command-line argument. ``macosx`` is supported for backward-compatibility reasons, but it is deprecated. @@ -1015,6 +1015,19 @@ When one method overrides another, the overriding method can be more widely avai - (id)method __attribute__((availability(macos,introduced=10.3))); // okay: method moved into base class later - (id)method __attribute__((availability(macos,introduced=10.5))); // error: this method was available via the base class in 10.4 @end + +Starting with the macOS 10.12 SDK, the ``API_AVAILABLE`` macro from +```` can simplify the spelling: + +.. code-block:: objc + + @interface A + - (id)method API_AVAILABLE(macos(10.11))); + - (id)otherMethod API_AVAILABLE(macos(10.11), ios(11.0)); + @end + +Also see the documentation for `@available +`_ }]; } diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def b/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def index 75781dc7491d..1ddb9beaf913 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/Builtins.def @@ -1413,6 +1413,11 @@ BUILTIN(__builtin_os_log_format, "v*v*cC*.", "p:0:nt") // Builtins for XRay BUILTIN(__xray_customevent, "vcC*z", "") +// Win64-compatible va_list functions +BUILTIN(__builtin_ms_va_start, "vc*&.", "nt") +BUILTIN(__builtin_ms_va_end, "vc*&", "n") +BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsHexagon.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsHexagon.def index 85936cbfc08e..14fc4adc25bc 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsHexagon.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsHexagon.def @@ -882,6 +882,12 @@ BUILTIN(__builtin_HEXAGON_S2_ct0p,"iLLi","") BUILTIN(__builtin_HEXAGON_S2_ct1p,"iLLi","") BUILTIN(__builtin_HEXAGON_S2_interleave,"LLiLLi","") BUILTIN(__builtin_HEXAGON_S2_deinterleave,"LLiLLi","") +BUILTIN(__builtin_HEXAGON_Y2_dccleana,"vv*","") +BUILTIN(__builtin_HEXAGON_Y2_dccleaninva,"vv*","") +BUILTIN(__builtin_HEXAGON_Y2_dcinva,"vv*","") +BUILTIN(__builtin_HEXAGON_Y2_dczeroa,"vv*","") +BUILTIN(__builtin_HEXAGON_Y4_l2fetch,"vv*Ui","") +BUILTIN(__builtin_HEXAGON_Y5_l2fetch,"vv*LLUi","") BUILTIN(__builtin_HEXAGON_S6_rol_i_r,"iii","v:60:") BUILTIN(__builtin_HEXAGON_S6_rol_i_p,"LLiLLii","v:60:") diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsSystemZ.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsSystemZ.def index fa96e10b3990..ac92286af0b5 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsSystemZ.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsSystemZ.def @@ -253,5 +253,29 @@ TARGET_BUILTIN(__builtin_s390_vfmsdb, "V2dV2dV2dV2d", "nc", "vector") TARGET_BUILTIN(__builtin_s390_vfsqdb, "V2dV2d", "nc", "vector") TARGET_BUILTIN(__builtin_s390_vftcidb, "V2SLLiV2dIii*", "nc", "vector") +// Vector-enhancements facility 1 intrinsics. +TARGET_BUILTIN(__builtin_s390_vlrl, "V16ScUivC*", "", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vstrl, "vV16ScUiv*", "", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vbperm, "V2ULLiV16UcV16Uc", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vmslg, "V16UcV2ULLiV2ULLiV16UcIi", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfmaxdb, "V2dV2dV2dIi", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfmindb, "V2dV2dV2dIi", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfnmadb, "V2dV2dV2dV2d", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfnmsdb, "V2dV2dV2dV2d", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfcesbs, "V4SiV4fV4fi*", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfchsbs, "V4SiV4fV4fi*", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfchesbs, "V4SiV4fV4fi*", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfisb, "V4fV4fIiIi", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfmaxsb, "V4fV4fV4fIi", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfminsb, "V4fV4fV4fIi", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vflnsb, "V4fV4f", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vflpsb, "V4fV4f", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfmasb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfmssb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfnmasb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfnmssb, "V4fV4fV4fV4f", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vfsqsb, "V4fV4f", "nc", "vector-enhancements-1") +TARGET_BUILTIN(__builtin_s390_vftcisb, "V4SiV4fIii*", "nc", "vector-enhancements-1") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def index 4cd3f1d46473..a516bf6bf06c 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def @@ -34,11 +34,6 @@ // can use it? BUILTIN(__builtin_cpu_supports, "bcC*", "nc") -// Win64-compatible va_list functions -BUILTIN(__builtin_ms_va_start, "vc*&.", "nt") -BUILTIN(__builtin_ms_va_end, "vc*&", "n") -BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") - // Undefined Values // TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "") diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td index 3a0564806b32..53d8f36ecd00 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td @@ -152,6 +152,8 @@ def GNUFoldingConstant : DiagGroup<"gnu-folding-constant">; def FormatExtraArgs : DiagGroup<"format-extra-args">; def FormatZeroLength : DiagGroup<"format-zero-length">; def CXX1zCompatMangling : DiagGroup<"c++1z-compat-mangling">; +// Name of this warning in GCC. +def NoexceptType : DiagGroup<"noexcept-type", [CXX1zCompatMangling]>; // Warnings for C++1y code which is not compatible with prior C++ standards. def CXXPre14Compat : DiagGroup<"c++98-c++11-compat">; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h index 479d1978c62d..cdd358542a0d 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticIDs.h @@ -18,6 +18,7 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/StringRef.h" +#include namespace clang { class DiagnosticsEngine; @@ -263,6 +264,13 @@ class DiagnosticIDs : public RefCountedBase { /// are not SFINAE errors. static SFINAEResponse getDiagnosticSFINAEResponse(unsigned DiagID); + /// \brief Get the string of all diagnostic flags. + /// + /// \returns A list of all diagnostics flags as they would be written in a + /// command line invocation including their `no-` variants. For example: + /// `{"-Wempty-body", "-Wno-empty-body", ...}` + static std::vector getDiagnosticFlags(); + /// \brief Get the set of all diagnostic IDs in the group with the given name. /// /// \param[out] Diags - On return, the diagnostics in the group. diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5a8750e4dab6..af14638e1d61 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -808,8 +808,10 @@ def warn_property_types_are_incompatible : Warning< "property type %0 is incompatible with type %1 inherited from %2">, InGroup>; def warn_protocol_property_mismatch : Warning< - "property of type %0 was selected for synthesis">, + "property %select{of type %1|with attribute '%1'|without attribute '%1'|with " + "getter %1|with setter %1}0 was selected for synthesis">, InGroup>; +def err_protocol_property_mismatch: Error; def err_undef_interface : Error<"cannot find interface declaration for %0">; def err_category_forward_interface : Error< "cannot define %select{category|class extension}0 for undefined class %1">; @@ -1088,7 +1090,9 @@ def err_category_property : Error< def note_property_declare : Note< "property declared here">; def note_protocol_property_declare : Note< - "it could also be property of type %0 declared here">; + "it could also be property " + "%select{of type %1|without attribute '%1'|with attribute '%1'|with getter " + "%1|with setter %1}0 declared here">; def note_property_synthesize : Note< "property synthesized here">; def err_synthesize_category_decl : Error< @@ -4575,8 +4579,11 @@ def warn_deprecated_fwdclass_message : Warning< "%0 may be deprecated because the receiver type is unknown">, InGroup; def warn_deprecated_def : Warning< - "Implementing deprecated %select{method|class|category}0">, - InGroup, DefaultIgnore; + "implementing deprecated %select{method|class|category}0">, + InGroup, DefaultIgnore; +def warn_unavailable_def : Warning< + "implementing unavailable method">, + InGroup, DefaultIgnore; def err_unavailable : Error<"%0 is unavailable">; def err_property_method_unavailable : Error<"property access is using %0 method which is unavailable">; @@ -8106,10 +8113,10 @@ def err_systemz_invalid_tabort_code : Error< "invalid transaction abort code">; def err_64_bit_builtin_32_bit_tgt : Error< "this builtin is only available on 64-bit targets">; +def err_builtin_x64_aarch64_only : Error< + "this builtin is only available on x86-64 and aarch64 targets">; def err_ppc_builtin_only_on_pwr7 : Error< "this builtin is only valid on POWER7 or later CPUs">; -def err_x86_builtin_64_only : Error< - "this builtin is only available on x86-64 targets">; def err_x86_builtin_invalid_rounding : Error< "invalid rounding argument">; def err_x86_builtin_invalid_scale : Error< @@ -8648,11 +8655,11 @@ def err_omp_unknown_reduction_identifier : Error< def err_omp_not_resolved_reduction_identifier : Error< "unable to resolve declare reduction construct for type %0">; def err_omp_reduction_ref_type_arg : Error< - "argument of OpenMP clause 'reduction' must reference the same object in all threads">; + "argument of OpenMP clause '%0' must reference the same object in all threads">; def err_omp_clause_not_arithmetic_type_arg : Error< - "arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of %select{scalar|arithmetic}0 type">; + "arguments of OpenMP clause '%0' for 'min' or 'max' must be of %select{scalar|arithmetic}1 type">; def err_omp_clause_floating_type_arg : Error< - "arguments of OpenMP clause 'reduction' with bitwise operators cannot be of floating type">; + "arguments of OpenMP clause '%0' with bitwise operators cannot be of floating type">; def err_omp_once_referenced : Error< "variable can appear only once in OpenMP '%0' clause">; def err_omp_once_referenced_in_target_update : Error< diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td index 0fc54848581c..420ccebbfaf0 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSerializationKinds.td @@ -147,18 +147,29 @@ def err_module_odr_violation_mismatch_decl_diff : Error< "%select{non-|}5mutable field %4|" "field %4 with %select{no|an}5 initalizer|" "field %4 with an initializer|" - "method %4|" - "method %4 is %select{not deleted|deleted}5|" - "method %4 is %select{|pure }5%select{not virtual|virtual}6|" - "method %4 is %select{not static|static}5|" - "method %4 is %select{not volatile|volatile}5|" - "method %4 is %select{not const|const}5|" - "method %4 is %select{not inline|inline}5|" - "method %4 that has %5 parameter%s5|" - "method %4 with %ordinal5 parameter of type %6%select{| decayed from %8}7|" - "method %4 with %ordinal5 parameter named %6|" - "method %4 with %ordinal5 parameter with%select{out|}6 a default argument|" - "method %4 with %ordinal5 parameter with a default argument|" + "%select{method %5|constructor|destructor}4|" + "%select{method %5|constructor|destructor}4 " + "is %select{not deleted|deleted}6|" + "%select{method %5|constructor|destructor}4 " + "is %select{|pure }6%select{not virtual|virtual}7|" + "%select{method %5|constructor|destructor}4 " + "is %select{not static|static}6|" + "%select{method %5|constructor|destructor}4 " + "is %select{not volatile|volatile}6|" + "%select{method %5|constructor|destructor}4 " + "is %select{not const|const}6|" + "%select{method %5|constructor|destructor}4 " + "is %select{not inline|inline}6|" + "%select{method %5|constructor|destructor}4 " + "that has %6 parameter%s6|" + "%select{method %5|constructor|destructor}4 " + "with %ordinal6 parameter of type %7%select{| decayed from %9}8|" + "%select{method %5|constructor|destructor}4 " + "with %ordinal6 parameter named %7|" + "%select{method %5|constructor|destructor}4 " + "with %ordinal6 parameter with%select{out|}7 a default argument|" + "%select{method %5|constructor|destructor}4 " + "with %ordinal6 parameter with a default argument|" "%select{typedef|type alias}4 name %5|" "%select{typedef|type alias}4 %5 with underlying type %6|" "data member with name %4|" @@ -183,18 +194,29 @@ def note_module_odr_violation_mismatch_decl_diff : Note<"but in '%0' found " "%select{non-|}3mutable field %2|" "field %2 with %select{no|an}3 initializer|" "field %2 with a different initializer|" - "method %2|" - "method %2 is %select{not deleted|deleted}3|" - "method %2 is %select{|pure }3%select{not virtual|virtual}4|" - "method %2 is %select{not static|static}3|" - "method %2 is %select{not volatile|volatile}3|" - "method %2 is %select{not const|const}3|" - "method %2 is %select{not inline|inline}3|" - "method %2 that has %3 parameter%s3|" - "method %2 with %ordinal3 parameter of type %4%select{| decayed from %6}5|" - "method %2 with %ordinal3 parameter named %4|" - "method %2 with %ordinal3 parameter with%select{out|}4 a default argument|" - "method %2 with %ordinal3 parameter with a different default argument|" + "%select{method %3|constructor|destructor}2|" + "%select{method %3|constructor|destructor}2 " + "is %select{not deleted|deleted}4|" + "%select{method %3|constructor|destructor}2 " + "is %select{|pure }4%select{not virtual|virtual}5|" + "%select{method %3|constructor|destructor}2 " + "is %select{not static|static}4|" + "%select{method %3|constructor|destructor}2 " + "is %select{not volatile|volatile}4|" + "%select{method %3|constructor|destructor}2 " + "is %select{not const|const}4|" + "%select{method %3|constructor|destructor}2 " + "is %select{not inline|inline}4|" + "%select{method %3|constructor|destructor}2 " + "that has %4 parameter%s4|" + "%select{method %3|constructor|destructor}2 " + "with %ordinal4 parameter of type %5%select{| decayed from %7}6|" + "%select{method %3|constructor|destructor}2 " + "with %ordinal4 parameter named %5|" + "%select{method %3|constructor|destructor}2 " + "with %ordinal4 parameter with%select{out|}5 a default argument|" + "%select{method %3|constructor|destructor}2 " + "with %ordinal4 parameter with a different default argument|" "%select{typedef|type alias}2 name %3|" "%select{typedef|type alias}2 %3 with different underlying type %4|" "data member with name %2|" diff --git a/contrib/llvm/tools/clang/include/clang/Basic/IdentifierTable.h b/contrib/llvm/tools/clang/include/clang/Basic/IdentifierTable.h index 9b1ba4a98e6f..f94b2c9b2f42 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/IdentifierTable.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/IdentifierTable.h @@ -272,10 +272,6 @@ class IdentifierInfo { /// this identifier is a C++ alternate representation of an operator. void setIsCPlusPlusOperatorKeyword(bool Val = true) { IsCPPOperatorKeyword = Val; - if (Val) - NeedsHandleIdentifier = true; - else - RecomputeNeedsHandleIdentifier(); } bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } @@ -381,10 +377,9 @@ class IdentifierInfo { /// This method is very tied to the definition of HandleIdentifier. Any /// change to it should be reflected here. void RecomputeNeedsHandleIdentifier() { - NeedsHandleIdentifier = - (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() | - isExtensionToken() | isFutureCompatKeyword() || isOutOfDate() || - isModulesImport()); + NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || + isExtensionToken() || isFutureCompatKeyword() || + isOutOfDate() || isModulesImport(); } }; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def b/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def index dfdad108922a..c9230e0aaa6f 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def @@ -90,6 +90,7 @@ LANGOPT(CPlusPlus , 1, 0, "C++") LANGOPT(CPlusPlus11 , 1, 0, "C++11") LANGOPT(CPlusPlus14 , 1, 0, "C++14") LANGOPT(CPlusPlus1z , 1, 0, "C++1z") +LANGOPT(CPlusPlus2a , 1, 0, "C++2a") LANGOPT(ObjC1 , 1, 0, "Objective-C 1") LANGOPT(ObjC2 , 1, 0, "Objective-C 2") BENIGN_LANGOPT(ObjCDefaultSynthProperties , 1, 0, diff --git a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def index aae1c3a9b8c5..645ed52b59ca 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def @@ -168,6 +168,9 @@ #ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE #define OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(Name) #endif +#ifndef OPENMP_TASKGROUP_CLAUSE +#define OPENMP_TASKGROUP_CLAUSE(Name) +#endif // OpenMP directives. OPENMP_DIRECTIVE(threadprivate) @@ -270,6 +273,7 @@ OPENMP_CLAUSE(to, OMPToClause) OPENMP_CLAUSE(from, OMPFromClause) OPENMP_CLAUSE(use_device_ptr, OMPUseDevicePtrClause) OPENMP_CLAUSE(is_device_ptr, OMPIsDevicePtrClause) +OPENMP_CLAUSE(task_reduction, OMPTaskReductionClause) // Clauses allowed for OpenMP directive 'parallel'. OPENMP_PARALLEL_CLAUSE(if) @@ -848,6 +852,10 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(aligned) OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(safelen) OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(simdlen) +// Clauses allowed for OpenMP directive 'taskgroup'. +OPENMP_TASKGROUP_CLAUSE(task_reduction) + +#undef OPENMP_TASKGROUP_CLAUSE #undef OPENMP_TASKLOOP_SIMD_CLAUSE #undef OPENMP_TASKLOOP_CLAUSE #undef OPENMP_LINEAR_KIND diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h index 33952f83ff23..50fb936e01d1 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/Specifiers.h @@ -236,7 +236,7 @@ namespace clang { CC_X86ThisCall, // __attribute__((thiscall)) CC_X86VectorCall, // __attribute__((vectorcall)) CC_X86Pascal, // __attribute__((pascal)) - CC_X86_64Win64, // __attribute__((ms_abi)) + CC_Win64, // __attribute__((ms_abi)) CC_X86_64SysV, // __attribute__((sysv_abi)) CC_X86RegCall, // __attribute__((regcall)) CC_AAPCS, // __attribute__((pcs("aapcs"))) diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h b/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h index 5885532b91db..d1a9ea85dbe9 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h @@ -226,6 +226,20 @@ class TargetInfo : public RefCountedBase { public: IntType getSizeType() const { return SizeType; } + IntType getSignedSizeType() const { + switch (SizeType) { + case UnsignedShort: + return SignedShort; + case UnsignedInt: + return SignedInt; + case UnsignedLong: + return SignedLong; + case UnsignedLongLong: + return SignedLongLong; + default: + llvm_unreachable("Invalid SizeType"); + } + } IntType getIntMaxType() const { return IntMaxType; } IntType getUIntMaxType() const { return getCorrespondingUnsignedType(IntMaxType); diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td index 861dfbf1916e..753c178eec6a 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td @@ -2119,6 +2119,7 @@ def nofixprebinding : Flag<["-"], "nofixprebinding">; def nolibc : Flag<["-"], "nolibc">; def nomultidefs : Flag<["-"], "nomultidefs">; def nopie : Flag<["-"], "nopie">; +def no_pie : Flag<["-"], "no-pie">, Alias; def noprebind : Flag<["-"], "noprebind">; def noseglinkedit : Flag<["-"], "noseglinkedit">; def nostartfiles : Flag<["-"], "nostartfiles">; diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/LangStandard.h b/contrib/llvm/tools/clang/include/clang/Frontend/LangStandard.h index ec32aa8d161f..6731e08bcae8 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/LangStandard.h +++ b/contrib/llvm/tools/clang/include/clang/Frontend/LangStandard.h @@ -26,11 +26,12 @@ enum LangFeatures { CPlusPlus11 = (1 << 4), CPlusPlus14 = (1 << 5), CPlusPlus1z = (1 << 6), - Digraphs = (1 << 7), - GNUMode = (1 << 8), - HexFloat = (1 << 9), - ImplicitInt = (1 << 10), - OpenCL = (1 << 11) + CPlusPlus2a = (1 << 7), + Digraphs = (1 << 8), + GNUMode = (1 << 9), + HexFloat = (1 << 10), + ImplicitInt = (1 << 11), + OpenCL = (1 << 12) }; } @@ -81,6 +82,10 @@ struct LangStandard { /// isCPlusPlus1z - Language is a C++17 variant (or later). bool isCPlusPlus1z() const { return Flags & frontend::CPlusPlus1z; } + /// isCPlusPlus2a - Language is a post-C++17 variant (or later). + bool isCPlusPlus2a() const { return Flags & frontend::CPlusPlus2a; } + + /// hasDigraphs - Language supports digraphs. bool hasDigraphs() const { return Flags & frontend::Digraphs; } diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def b/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def index 1d214fd2a2be..669e487023a5 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def +++ b/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def @@ -119,6 +119,16 @@ LANGSTANDARD(gnucxx1z, "gnu++1z", LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z | Digraphs | HexFloat | GNUMode) +LANGSTANDARD(cxx2a, "c++2a", + CXX, "Working draft for ISO C++ 2020", + LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z | + CPlusPlus2a | Digraphs | HexFloat) + +LANGSTANDARD(gnucxx2a, "gnu++2a", + CXX, "Working draft for ISO C++ 2020 with GNU extensions", + LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus1z | + CPlusPlus2a | Digraphs | HexFloat | GNUMode) + // OpenCL LANGSTANDARD(opencl10, "cl1.0", OpenCL, "OpenCL 1.0", diff --git a/contrib/llvm/tools/clang/include/clang/Index/IndexingAction.h b/contrib/llvm/tools/clang/include/clang/Index/IndexingAction.h index 8eed33c61227..fb703be4e5f5 100644 --- a/contrib/llvm/tools/clang/include/clang/Index/IndexingAction.h +++ b/contrib/llvm/tools/clang/include/clang/Index/IndexingAction.h @@ -11,11 +11,14 @@ #define LLVM_CLANG_INDEX_INDEXINGACTION_H #include "clang/Basic/LLVM.h" +#include "llvm/ADT/ArrayRef.h" #include namespace clang { + class ASTContext; class ASTReader; class ASTUnit; + class Decl; class FrontendAction; namespace serialization { @@ -47,8 +50,11 @@ void indexASTUnit(ASTUnit &Unit, std::shared_ptr DataConsumer, IndexingOptions Opts); -void indexModuleFile(serialization::ModuleFile &Mod, - ASTReader &Reader, +void indexTopLevelDecls(ASTContext &Ctx, ArrayRef Decls, + std::shared_ptr DataConsumer, + IndexingOptions Opts); + +void indexModuleFile(serialization::ModuleFile &Mod, ASTReader &Reader, std::shared_ptr DataConsumer, IndexingOptions Opts); diff --git a/contrib/llvm/tools/clang/include/clang/Lex/MacroInfo.h b/contrib/llvm/tools/clang/include/clang/Lex/MacroInfo.h index 7da1e7b41ab8..d25431b55fdc 100644 --- a/contrib/llvm/tools/clang/include/clang/Lex/MacroInfo.h +++ b/contrib/llvm/tools/clang/include/clang/Lex/MacroInfo.h @@ -42,14 +42,14 @@ class MacroInfo { /// \brief The list of arguments for a function-like macro. /// - /// ArgumentList points to the first of NumArguments pointers. + /// ParameterList points to the first of NumParameters pointers. /// /// This can be empty, for, e.g. "#define X()". In a C99-style variadic /// macro, this includes the \c __VA_ARGS__ identifier on the list. - IdentifierInfo **ArgumentList; + IdentifierInfo **ParameterList; - /// \see ArgumentList - unsigned NumArguments; + /// \see ParameterList + unsigned NumParameters; /// \brief This is the list of tokens that the macro is defined to. SmallVector ReplacementTokens; @@ -153,37 +153,37 @@ class MacroInfo { /// \brief Set the value of the IsWarnIfUnused flag. void setIsWarnIfUnused(bool val) { IsWarnIfUnused = val; } - /// \brief Set the specified list of identifiers as the argument list for + /// \brief Set the specified list of identifiers as the parameter list for /// this macro. - void setArgumentList(ArrayRef List, + void setParameterList(ArrayRef List, llvm::BumpPtrAllocator &PPAllocator) { - assert(ArgumentList == nullptr && NumArguments == 0 && - "Argument list already set!"); + assert(ParameterList == nullptr && NumParameters == 0 && + "Parameter list already set!"); if (List.empty()) return; - NumArguments = List.size(); - ArgumentList = PPAllocator.Allocate(List.size()); - std::copy(List.begin(), List.end(), ArgumentList); + NumParameters = List.size(); + ParameterList = PPAllocator.Allocate(List.size()); + std::copy(List.begin(), List.end(), ParameterList); } - /// Arguments - The list of arguments for a function-like macro. This can be - /// empty, for, e.g. "#define X()". - typedef IdentifierInfo *const *arg_iterator; - bool arg_empty() const { return NumArguments == 0; } - arg_iterator arg_begin() const { return ArgumentList; } - arg_iterator arg_end() const { return ArgumentList + NumArguments; } - unsigned getNumArgs() const { return NumArguments; } - ArrayRef args() const { - return ArrayRef(ArgumentList, NumArguments); + /// Parameters - The list of parameters for a function-like macro. This can + /// be empty, for, e.g. "#define X()". + typedef IdentifierInfo *const *param_iterator; + bool param_empty() const { return NumParameters == 0; } + param_iterator param_begin() const { return ParameterList; } + param_iterator param_end() const { return ParameterList + NumParameters; } + unsigned getNumParams() const { return NumParameters; } + ArrayRef params() const { + return ArrayRef(ParameterList, NumParameters); } - /// \brief Return the argument number of the specified identifier, - /// or -1 if the identifier is not a formal argument identifier. - int getArgumentNum(const IdentifierInfo *Arg) const { - for (arg_iterator I = arg_begin(), E = arg_end(); I != E; ++I) + /// \brief Return the parameter number of the specified identifier, + /// or -1 if the identifier is not a formal parameter identifier. + int getParameterNum(const IdentifierInfo *Arg) const { + for (param_iterator I = param_begin(), E = param_end(); I != E; ++I) if (*I == Arg) - return I - arg_begin(); + return I - param_begin(); return -1; } diff --git a/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h b/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h index 62090d6496ed..a058fbfbb4cf 100644 --- a/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h +++ b/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h @@ -1813,11 +1813,24 @@ class Preprocessor { void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, bool *ShadowFlag = nullptr); + /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the + /// entire line) of the macro's tokens and adds them to MacroInfo, and while + /// doing so performs certain validity checks including (but not limited to): + /// - # (stringization) is followed by a macro parameter + /// \param MacroNameTok - Token that represents the macro name + /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard + /// + /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and + /// returns a nullptr if an invalid sequence of tokens is encountered. + + MacroInfo *ReadOptionalMacroParameterListAndBody( + const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); + /// The ( starting an argument list of a macro definition has just been read. - /// Lex the rest of the arguments and the closing ), updating \p MI with + /// Lex the rest of the parameters and the closing ), updating \p MI with /// what we learn and saving in \p LastTok the last token read. /// Return true if an error occurs parsing the arg list. - bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok); + bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); /// We just read a \#if or related directive and decided that the /// subsequent tokens are in the \#if'd out portion of the @@ -1878,7 +1891,7 @@ class Preprocessor { /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. - MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI, + MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, SourceLocation &ExpansionEnd); /// \brief If an identifier token is read that is to be expanded diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h index 95629a2591cf..5a708545705c 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h @@ -3881,13 +3881,10 @@ class Sema { void redelayDiagnostics(sema::DelayedDiagnosticPool &pool); - void EmitAvailabilityWarning(AvailabilityResult AR, - const NamedDecl *ReferringDecl, - const NamedDecl *OffendingDecl, - StringRef Message, SourceLocation Loc, - const ObjCInterfaceDecl *UnknownObjCClass, - const ObjCPropertyDecl *ObjCProperty, - bool ObjCPropertyAccess); + void DiagnoseAvailabilityOfDecl(NamedDecl *D, SourceLocation Loc, + const ObjCInterfaceDecl *UnknownObjCClass, + bool ObjCPropertyAccess, + bool AvoidPartialAvailabilityChecks = false); bool makeUnavailableInSystemHeader(SourceLocation loc, UnavailableAttr::ImplicitReason reason); @@ -8380,6 +8377,8 @@ class Sema { unsigned SpellingListIndex, bool isNSConsumed, bool isTemplateInstantiation); + bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type); + //===--------------------------------------------------------------------===// // C++ Coroutines TS // @@ -8680,7 +8679,8 @@ class Sema { StmtResult ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc, SourceLocation EndLoc); /// \brief Called on well-formed '\#pragma omp taskgroup'. - StmtResult ActOnOpenMPTaskgroupDirective(Stmt *AStmt, SourceLocation StartLoc, + StmtResult ActOnOpenMPTaskgroupDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); /// \brief Called on well-formed '\#pragma omp flush'. StmtResult ActOnOpenMPFlushDirective(ArrayRef Clauses, @@ -9022,6 +9022,13 @@ class Sema { CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, ArrayRef UnresolvedReductions = llvm::None); + /// Called on well-formed 'task_reduction' clause. + OMPClause *ActOnOpenMPTaskReductionClause( + ArrayRef VarList, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, + CXXScopeSpec &ReductionIdScopeSpec, + const DeclarationNameInfo &ReductionId, + ArrayRef UnresolvedReductions = llvm::None); /// \brief Called on well-formed 'linear' clause. OMPClause * ActOnOpenMPLinearClause(ArrayRef VarList, Expr *Step, @@ -10416,15 +10423,6 @@ class Sema { return OriginalLexicalContext ? OriginalLexicalContext : CurContext; } - /// The diagnostic we should emit for \c D, and the declaration that - /// originated it, or \c AR_Available. - /// - /// \param D The declaration to check. - /// \param Message If non-null, this will be populated with the message from - /// the availability attribute that is selected. - std::pair - ShouldDiagnoseAvailabilityOfDecl(const NamedDecl *D, std::string *Message); - const DeclContext *getCurObjCLexicalContext() const { const DeclContext *DC = getCurLexicalContext(); // A category implicitly has the attribute of the interface. diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h b/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h index f32b9fa9c94b..4d6ff063641b 100644 --- a/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h +++ b/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h @@ -56,6 +56,9 @@ template <> struct MappingTraits { MappingNormalization Keys( Io, D); Io.mapRequired("DiagnosticName", Keys->DiagnosticName); + Io.mapRequired("Message", Keys->Message.Message); + Io.mapRequired("FileOffset", Keys->Message.FileOffset); + Io.mapRequired("FilePath", Keys->Message.FilePath); // FIXME: Export properly all the different fields. @@ -82,17 +85,7 @@ template <> struct MappingTraits { template <> struct MappingTraits { static void mapping(IO &Io, clang::tooling::TranslationUnitDiagnostics &Doc) { Io.mapRequired("MainSourceFile", Doc.MainSourceFile); - - std::vector Diagnostics; - for (auto &Diagnostic : Doc.Diagnostics) { - // FIXME: Export all diagnostics, not just the ones with fixes. - // Update MappingTraits::mapping. - if (Diagnostic.Fix.size() > 0) { - Diagnostics.push_back(Diagnostic); - } - } - Io.mapRequired("Diagnostics", Diagnostics); - Doc.Diagnostics = Diagnostics; + Io.mapRequired("Diagnostics", Doc.Diagnostics); } }; } // end namespace yaml diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h b/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h new file mode 100644 index 000000000000..8b01a61256f6 --- /dev/null +++ b/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h @@ -0,0 +1,122 @@ +//===--- RecursiveSymbolVisitor.h - Clang refactoring library -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief A wrapper class around \c RecursiveASTVisitor that visits each +/// occurrences of a named symbol. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H +#define LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H + +#include "clang/AST/AST.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace tooling { + +/// Traverses the AST and visits the occurrence of each named symbol in the +/// given nodes. +template +class RecursiveSymbolVisitor + : public RecursiveASTVisitor> { + using BaseType = RecursiveASTVisitor>; + +public: + RecursiveSymbolVisitor(const SourceManager &SM, const LangOptions &LangOpts) + : SM(SM), LangOpts(LangOpts) {} + + bool visitSymbolOccurrence(const NamedDecl *ND, + ArrayRef NameRanges) { + return true; + } + + // Declaration visitors: + + bool VisitNamedDecl(const NamedDecl *D) { + return isa(D) ? true : visit(D, D->getLocation()); + } + + bool VisitCXXConstructorDecl(const CXXConstructorDecl *CD) { + for (const auto *Initializer : CD->inits()) { + // Ignore implicit initializers. + if (!Initializer->isWritten()) + continue; + if (const FieldDecl *FD = Initializer->getMember()) { + if (!visit(FD, Initializer->getSourceLocation(), + Lexer::getLocForEndOfToken(Initializer->getSourceLocation(), + 0, SM, LangOpts))) + return false; + } + } + return true; + } + + // Expression visitors: + + bool VisitDeclRefExpr(const DeclRefExpr *Expr) { + return visit(Expr->getFoundDecl(), Expr->getLocation()); + } + + bool VisitMemberExpr(const MemberExpr *Expr) { + return visit(Expr->getFoundDecl().getDecl(), Expr->getMemberLoc()); + } + + // Other visitors: + + bool VisitTypeLoc(const TypeLoc Loc) { + const SourceLocation TypeBeginLoc = Loc.getBeginLoc(); + const SourceLocation TypeEndLoc = + Lexer::getLocForEndOfToken(TypeBeginLoc, 0, SM, LangOpts); + if (const auto *TemplateTypeParm = + dyn_cast(Loc.getType())) { + if (!visit(TemplateTypeParm->getDecl(), TypeBeginLoc, TypeEndLoc)) + return false; + } + if (const auto *TemplateSpecType = + dyn_cast(Loc.getType())) { + if (!visit(TemplateSpecType->getTemplateName().getAsTemplateDecl(), + TypeBeginLoc, TypeEndLoc)) + return false; + } + return visit(Loc.getType()->getAsCXXRecordDecl(), TypeBeginLoc, TypeEndLoc); + } + + bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) { + // The base visitor will visit NNSL prefixes, so we should only look at + // the current NNS. + if (NNS) { + const NamespaceDecl *ND = NNS.getNestedNameSpecifier()->getAsNamespace(); + if (!visit(ND, NNS.getLocalBeginLoc(), NNS.getLocalEndLoc())) + return false; + } + return BaseType::TraverseNestedNameSpecifierLoc(NNS); + } + +private: + const SourceManager &SM; + const LangOptions &LangOpts; + + bool visit(const NamedDecl *ND, SourceLocation BeginLoc, + SourceLocation EndLoc) { + return static_cast(this)->visitSymbolOccurrence( + ND, SourceRange(BeginLoc, EndLoc)); + } + bool visit(const NamedDecl *ND, SourceLocation Loc) { + return visit(ND, Loc, + Loc.getLocWithOffset(ND->getNameAsString().length() - 1)); + } +}; + +} // end namespace tooling +} // end namespace clang + +#endif // LLVM_CLANG_TOOLING_REFACTOR_RECURSIVE_SYMBOL_VISITOR_H diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h b/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h index 28d541af43c0..b74a5d7f70af 100644 --- a/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h +++ b/contrib/llvm/tools/clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h @@ -18,13 +18,9 @@ #include "clang/AST/AST.h" #include "clang/AST/ASTContext.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" #include #include -using namespace llvm; -using namespace clang::ast_matchers; - namespace clang { class ASTContext; @@ -48,36 +44,6 @@ const NamedDecl *getNamedDeclFor(const ASTContext &Context, // Converts a Decl into a USR. std::string getUSRForDecl(const Decl *Decl); -// FIXME: Implement RecursiveASTVisitor::VisitNestedNameSpecifier instead. -class NestedNameSpecifierLocFinder : public MatchFinder::MatchCallback { -public: - explicit NestedNameSpecifierLocFinder(ASTContext &Context) - : Context(Context) {} - - std::vector getNestedNameSpecifierLocations() { - addMatchers(); - Finder.matchAST(Context); - return Locations; - } - -private: - void addMatchers() { - const auto NestedNameSpecifierLocMatcher = - nestedNameSpecifierLoc().bind("nestedNameSpecifierLoc"); - Finder.addMatcher(NestedNameSpecifierLocMatcher, this); - } - - void run(const MatchFinder::MatchResult &Result) override { - const auto *NNS = Result.Nodes.getNodeAs( - "nestedNameSpecifierLoc"); - Locations.push_back(*NNS); - } - - ASTContext &Context; - std::vector Locations; - MatchFinder Finder; -}; - } // end namespace tooling } // end namespace clang diff --git a/contrib/llvm/tools/clang/include/clang/module.modulemap b/contrib/llvm/tools/clang/include/clang/module.modulemap index f7e338d93399..d850bd552e1f 100644 --- a/contrib/llvm/tools/clang/include/clang/module.modulemap +++ b/contrib/llvm/tools/clang/include/clang/module.modulemap @@ -138,5 +138,4 @@ module Clang_Tooling { // importing the AST matchers library gives a link dependency on the AST // matchers (and thus the AST), which clang-format should not have. exclude header "Tooling/RefactoringCallbacks.h" - exclude header "Tooling/Refactoring/Rename/USRFinder.h" } diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp index fd9723298fca..c60373c5a90a 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp @@ -4525,6 +4525,12 @@ CanQualType ASTContext::getSizeType() const { return getFromTargetType(Target->getSizeType()); } +/// Return the unique signed counterpart of the integer type +/// corresponding to size_t. +CanQualType ASTContext::getSignedSizeType() const { + return getFromTargetType(Target->getSignedSizeType()); +} + /// getIntMaxType - Return the unique type for "intmax_t" (C99 7.18.1.5). CanQualType ASTContext::getIntMaxType() const { return getFromTargetType(Target->getIntMaxType()); diff --git a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp index 4758109fbcf7..92ed7da94d8e 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp @@ -1189,12 +1189,8 @@ void ASTDumper::VisitFunctionDecl(const FunctionDecl *D) { auto dumpOverride = [=](const CXXMethodDecl *D) { SplitQualType T_split = D->getType().split(); - OS << D << " " << D->getParent()->getName() << "::"; - if (isa(D)) - OS << "~" << D->getParent()->getName(); - else - OS << D->getName(); - OS << " '" << QualType::getAsString(T_split) << "'"; + OS << D << " " << D->getParent()->getName() << "::" + << D->getNameAsString() << " '" << QualType::getAsString(T_split) << "'"; }; dumpChild([=] { diff --git a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp index a0ec0c2b251e..d8bdb6369e94 100644 --- a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp +++ b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp @@ -1889,25 +1889,23 @@ void ObjCProtocolDecl::collectPropertiesToImplement(PropertyMap &PM, } } - void ObjCProtocolDecl::collectInheritedProtocolProperties( - const ObjCPropertyDecl *Property, - ProtocolPropertyMap &PM) const { + const ObjCPropertyDecl *Property, ProtocolPropertySet &PS, + PropertyDeclOrder &PO) const { if (const ObjCProtocolDecl *PDecl = getDefinition()) { - bool MatchFound = false; + if (!PS.insert(PDecl).second) + return; for (auto *Prop : PDecl->properties()) { if (Prop == Property) continue; if (Prop->getIdentifier() == Property->getIdentifier()) { - PM[PDecl] = Prop; - MatchFound = true; - break; + PO.push_back(Prop); + return; } } // Scan through protocol's protocols which did not have a matching property. - if (!MatchFound) - for (const auto *PI : PDecl->protocols()) - PI->collectInheritedProtocolProperties(Property, PM); + for (const auto *PI : PDecl->protocols()) + PI->collectInheritedProtocolProperties(Property, PS, PO); } } diff --git a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp index dc25e5213bae..4e7c6c4edf37 100644 --- a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp @@ -2529,7 +2529,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_X86ThisCall: case CC_X86VectorCall: case CC_X86Pascal: - case CC_X86_64Win64: + case CC_Win64: case CC_X86_64SysV: case CC_X86RegCall: case CC_AAPCS: diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp index 3a899bdbb6d2..24b16f892e7a 100644 --- a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp +++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp @@ -2122,7 +2122,7 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) { switch (CC) { default: llvm_unreachable("Unsupported CC for mangling"); - case CC_X86_64Win64: + case CC_Win64: case CC_X86_64SysV: case CC_C: Out << 'A'; break; case CC_X86Pascal: Out << 'C'; break; diff --git a/contrib/llvm/tools/clang/lib/AST/ODRHash.cpp b/contrib/llvm/tools/clang/lib/AST/ODRHash.cpp index 66b9940b8b08..b19135384cfd 100644 --- a/contrib/llvm/tools/clang/lib/AST/ODRHash.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ODRHash.cpp @@ -246,7 +246,9 @@ class ODRDeclVisitor : public ConstDeclVisitor { } void VisitValueDecl(const ValueDecl *D) { - AddQualType(D->getType()); + if (!isa(D)) { + AddQualType(D->getType()); + } Inherited::VisitValueDecl(D); } @@ -305,6 +307,8 @@ class ODRDeclVisitor : public ConstDeclVisitor { Hash.AddSubDecl(Param); } + AddQualType(D->getReturnType()); + Inherited::VisitFunctionDecl(D); } @@ -350,6 +354,8 @@ bool ODRHash::isWhitelistedDecl(const Decl *D, const CXXRecordDecl *Parent) { default: return false; case Decl::AccessSpec: + case Decl::CXXConstructor: + case Decl::CXXDestructor: case Decl::CXXMethod: case Decl::Field: case Decl::Friend: diff --git a/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp b/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp index 77470a9b76d0..2c4d159a1bc8 100644 --- a/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp +++ b/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp @@ -46,6 +46,8 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { return static_cast(C); case OMPC_reduction: return static_cast(C); + case OMPC_task_reduction: + return static_cast(C); case OMPC_linear: return static_cast(C); case OMPC_if: @@ -112,6 +114,8 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) return static_cast(C); case OMPC_reduction: return static_cast(C); + case OMPC_task_reduction: + return static_cast(C); case OMPC_linear: return static_cast(C); case OMPC_schedule: @@ -505,6 +509,59 @@ OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C, return new (Mem) OMPReductionClause(N); } +void OMPTaskReductionClause::setPrivates(ArrayRef Privates) { + assert(Privates.size() == varlist_size() && + "Number of private copies is not the same as the preallocated buffer"); + std::copy(Privates.begin(), Privates.end(), varlist_end()); +} + +void OMPTaskReductionClause::setLHSExprs(ArrayRef LHSExprs) { + assert( + LHSExprs.size() == varlist_size() && + "Number of LHS expressions is not the same as the preallocated buffer"); + std::copy(LHSExprs.begin(), LHSExprs.end(), getPrivates().end()); +} + +void OMPTaskReductionClause::setRHSExprs(ArrayRef RHSExprs) { + assert( + RHSExprs.size() == varlist_size() && + "Number of RHS expressions is not the same as the preallocated buffer"); + std::copy(RHSExprs.begin(), RHSExprs.end(), getLHSExprs().end()); +} + +void OMPTaskReductionClause::setReductionOps(ArrayRef ReductionOps) { + assert(ReductionOps.size() == varlist_size() && "Number of task reduction " + "expressions is not the same " + "as the preallocated buffer"); + std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end()); +} + +OMPTaskReductionClause *OMPTaskReductionClause::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef VL, + NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo, + ArrayRef Privates, ArrayRef LHSExprs, + ArrayRef RHSExprs, ArrayRef ReductionOps, Stmt *PreInit, + Expr *PostUpdate) { + void *Mem = C.Allocate(totalSizeToAlloc(5 * VL.size())); + OMPTaskReductionClause *Clause = new (Mem) OMPTaskReductionClause( + StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo); + Clause->setVarRefs(VL); + Clause->setPrivates(Privates); + Clause->setLHSExprs(LHSExprs); + Clause->setRHSExprs(RHSExprs); + Clause->setReductionOps(ReductionOps); + Clause->setPreInitStmt(PreInit); + Clause->setPostUpdateExpr(PostUpdate); + return Clause; +} + +OMPTaskReductionClause *OMPTaskReductionClause::CreateEmpty(const ASTContext &C, + unsigned N) { + void *Mem = C.Allocate(totalSizeToAlloc(5 * N)); + return new (Mem) OMPTaskReductionClause(N); +} + OMPFlushClause *OMPFlushClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, diff --git a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp index cccb2f075b65..1dcb4fd5196b 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp @@ -522,23 +522,28 @@ OMPTaskwaitDirective *OMPTaskwaitDirective::CreateEmpty(const ASTContext &C, return new (Mem) OMPTaskwaitDirective(); } -OMPTaskgroupDirective *OMPTaskgroupDirective::Create(const ASTContext &C, - SourceLocation StartLoc, - SourceLocation EndLoc, - Stmt *AssociatedStmt) { - unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective), alignof(Stmt *)); +OMPTaskgroupDirective *OMPTaskgroupDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *AssociatedStmt) { + unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective) + + sizeof(OMPClause *) * Clauses.size(), + alignof(Stmt *)); void *Mem = C.Allocate(Size + sizeof(Stmt *)); OMPTaskgroupDirective *Dir = - new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc); + new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc, Clauses.size()); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setClauses(Clauses); return Dir; } OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, EmptyShell) { - unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective), alignof(Stmt *)); + unsigned Size = llvm::alignTo(sizeof(OMPTaskgroupDirective) + + sizeof(OMPClause *) * NumClauses, + alignof(Stmt *)); void *Mem = C.Allocate(Size + sizeof(Stmt *)); - return new (Mem) OMPTaskgroupDirective(); + return new (Mem) OMPTaskgroupDirective(NumClauses); } OMPCancellationPointDirective *OMPCancellationPointDirective::Create( diff --git a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp index 21f5259c3ca8..5ebaa32b49c8 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp @@ -836,6 +836,29 @@ void OMPClausePrinter::VisitOMPReductionClause(OMPReductionClause *Node) { } } +void OMPClausePrinter::VisitOMPTaskReductionClause( + OMPTaskReductionClause *Node) { + if (!Node->varlist_empty()) { + OS << "task_reduction("; + NestedNameSpecifier *QualifierLoc = + Node->getQualifierLoc().getNestedNameSpecifier(); + OverloadedOperatorKind OOK = + Node->getNameInfo().getName().getCXXOverloadedOperator(); + if (QualifierLoc == nullptr && OOK != OO_None) { + // Print reduction identifier in C format + OS << getOperatorSpelling(OOK); + } else { + // Use C++ format + if (QualifierLoc != nullptr) + QualifierLoc->print(OS, Policy); + OS << Node->getNameInfo(); + } + OS << ":"; + VisitOMPClauseList(Node, ' '); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPLinearClause(OMPLinearClause *Node) { if (!Node->varlist_empty()) { OS << "linear"; @@ -1081,7 +1104,7 @@ void StmtPrinter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *Node) { } void StmtPrinter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *Node) { - Indent() << "#pragma omp taskgroup"; + Indent() << "#pragma omp taskgroup "; PrintOMPExecutableDirective(Node); } diff --git a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp index f1fbe2806b5d..e06386018d68 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp @@ -549,6 +549,30 @@ void OMPClauseProfiler::VisitOMPReductionClause( Profiler->VisitStmt(E); } } +void OMPClauseProfiler::VisitOMPTaskReductionClause( + const OMPTaskReductionClause *C) { + Profiler->VisitNestedNameSpecifier( + C->getQualifierLoc().getNestedNameSpecifier()); + Profiler->VisitName(C->getNameInfo().getName()); + VisitOMPClauseList(C); + VistOMPClauseWithPostUpdate(C); + for (auto *E : C->privates()) { + if (E) + Profiler->VisitStmt(E); + } + for (auto *E : C->lhs_exprs()) { + if (E) + Profiler->VisitStmt(E); + } + for (auto *E : C->rhs_exprs()) { + if (E) + Profiler->VisitStmt(E); + } + for (auto *E : C->reduction_ops()) { + if (E) + Profiler->VisitStmt(E); + } +} void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) { VisitOMPClauseList(C); VistOMPClauseWithPostUpdate(C); diff --git a/contrib/llvm/tools/clang/lib/AST/Type.cpp b/contrib/llvm/tools/clang/lib/AST/Type.cpp index a62ca5f9b4d7..eac02c0102bc 100644 --- a/contrib/llvm/tools/clang/lib/AST/Type.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Type.cpp @@ -2630,7 +2630,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { case CC_X86ThisCall: return "thiscall"; case CC_X86Pascal: return "pascal"; case CC_X86VectorCall: return "vectorcall"; - case CC_X86_64Win64: return "ms_abi"; + case CC_Win64: return "ms_abi"; case CC_X86_64SysV: return "sysv_abi"; case CC_X86RegCall : return "regcall"; case CC_AAPCS: return "aapcs"; @@ -3023,6 +3023,7 @@ bool AttributedType::isQualifier() const { case AttributedType::attr_sptr: case AttributedType::attr_uptr: case AttributedType::attr_objc_kindof: + case AttributedType::attr_ns_returns_retained: return false; } llvm_unreachable("bad attributed type kind"); @@ -3056,6 +3057,7 @@ bool AttributedType::isCallingConv() const { case attr_objc_inert_unsafe_unretained: case attr_noreturn: case attr_nonnull: + case attr_ns_returns_retained: case attr_nullable: case attr_null_unspecified: case attr_objc_kindof: diff --git a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp index 0551340c37a1..15c63bf4ed98 100644 --- a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp +++ b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp @@ -104,6 +104,7 @@ namespace { void printAfter(QualType T, raw_ostream &OS); void AppendScope(DeclContext *DC, raw_ostream &OS); void printTag(TagDecl *T, raw_ostream &OS); + void printFunctionAfter(const FunctionType::ExtInfo &Info, raw_ostream &OS); #define ABSTRACT_TYPE(CLASS, PARENT) #define TYPE(CLASS, PARENT) \ void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \ @@ -685,6 +686,36 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T, FunctionType::ExtInfo Info = T->getExtInfo(); + printFunctionAfter(Info, OS); + + if (unsigned quals = T->getTypeQuals()) { + OS << ' '; + AppendTypeQualList(OS, quals, Policy.Restrict); + } + + switch (T->getRefQualifier()) { + case RQ_None: + break; + + case RQ_LValue: + OS << " &"; + break; + + case RQ_RValue: + OS << " &&"; + break; + } + T->printExceptionSpecification(OS, Policy); + + if (T->hasTrailingReturn()) { + OS << " -> "; + print(T->getReturnType(), OS, StringRef()); + } else + printAfter(T->getReturnType(), OS); +} + +void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, + raw_ostream &OS) { if (!InsideCCAttribute) { switch (Info.getCC()) { case CC_C: @@ -720,7 +751,7 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T, case CC_IntelOclBicc: OS << " __attribute__((intel_ocl_bicc))"; break; - case CC_X86_64Win64: + case CC_Win64: OS << " __attribute__((ms_abi))"; break; case CC_X86_64SysV: @@ -747,36 +778,13 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T, if (Info.getNoReturn()) OS << " __attribute__((noreturn))"; + if (Info.getProducesResult()) + OS << " __attribute__((ns_returns_retained))"; if (Info.getRegParm()) OS << " __attribute__((regparm (" << Info.getRegParm() << ")))"; if (Info.getNoCallerSavedRegs()) - OS << "__attribute__((no_caller_saved_registers))"; - - if (unsigned quals = T->getTypeQuals()) { - OS << ' '; - AppendTypeQualList(OS, quals, Policy.Restrict); - } - - switch (T->getRefQualifier()) { - case RQ_None: - break; - - case RQ_LValue: - OS << " &"; - break; - - case RQ_RValue: - OS << " &&"; - break; - } - T->printExceptionSpecification(OS, Policy); - - if (T->hasTrailingReturn()) { - OS << " -> "; - print(T->getReturnType(), OS, StringRef()); - } else - printAfter(T->getReturnType(), OS); + OS << " __attribute__((no_caller_saved_registers))"; } void TypePrinter::printFunctionNoProtoBefore(const FunctionNoProtoType *T, @@ -795,8 +803,7 @@ void TypePrinter::printFunctionNoProtoAfter(const FunctionNoProtoType *T, SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); OS << "()"; - if (T->getNoReturnAttr()) - OS << " __attribute__((noreturn))"; + printFunctionAfter(T->getExtInfo(), OS); printAfter(T->getReturnType(), OS); } @@ -1270,6 +1277,12 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, if (T->getAttrKind() == AttributedType::attr_objc_inert_unsafe_unretained) return; + // Don't print ns_returns_retained unless it had an effect. + if (T->getAttrKind() == AttributedType::attr_ns_returns_retained && + !T->getEquivalentType()->castAs() + ->getExtInfo().getProducesResult()) + return; + // Print nullability type specifiers that occur after if (T->getAttrKind() == AttributedType::attr_nonnull || T->getAttrKind() == AttributedType::attr_nullable || @@ -1361,6 +1374,10 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, OS << ')'; break; + case AttributedType::attr_ns_returns_retained: + OS << "ns_returns_retained"; + break; + // FIXME: When Sema learns to form this AttributedType, avoid printing the // attribute again in printFunctionProtoAfter. case AttributedType::attr_noreturn: OS << "noreturn"; break; diff --git a/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp b/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp index 60556697113a..50a3aa20bd19 100644 --- a/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp +++ b/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp @@ -466,8 +466,7 @@ ArgType PrintfSpecifier::getArgType(ASTContext &Ctx, case LengthModifier::AsIntMax: return ArgType(Ctx.getIntMaxType(), "intmax_t"); case LengthModifier::AsSizeT: - // FIXME: How to get the corresponding signed version of size_t? - return ArgType(); + return ArgType(Ctx.getSignedSizeType(), "ssize_t"); case LengthModifier::AsInt3264: return Ctx.getTargetInfo().getTriple().isArch64Bit() ? ArgType(Ctx.LongLongTy, "__int64") @@ -537,7 +536,7 @@ ArgType PrintfSpecifier::getArgType(ASTContext &Ctx, case LengthModifier::AsIntMax: return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); case LengthModifier::AsSizeT: - return ArgType(); // FIXME: ssize_t + return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); case LengthModifier::AsPtrDiff: return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); case LengthModifier::AsLongDouble: diff --git a/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp b/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp index ce493c1e5cab..932b3f1934cc 100644 --- a/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp @@ -510,6 +510,18 @@ StringRef DiagnosticIDs::getWarningOptionForDiag(unsigned DiagID) { return StringRef(); } +std::vector DiagnosticIDs::getDiagnosticFlags() { + std::vector Res; + for (size_t I = 1; DiagGroupNames[I] != '\0';) { + std::string Diag(DiagGroupNames + I + 1, DiagGroupNames[I]); + I += DiagGroupNames[I] + 1; + Res.push_back("-W" + Diag); + Res.push_back("-Wno" + Diag); + } + + return Res; +} + /// Return \c true if any diagnostics were found in this group, even if they /// were filtered out due to having the wrong flavor. static bool getDiagnosticsInGroup(diag::Flavor Flavor, diff --git a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp index 76a0e18c2d73..050c0cc466db 100644 --- a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp @@ -138,6 +138,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, case OMPC_lastprivate: case OMPC_shared: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_aligned: case OMPC_copyin: case OMPC_copyprivate: @@ -277,6 +278,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_lastprivate: case OMPC_shared: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_aligned: case OMPC_copyin: case OMPC_copyprivate: @@ -705,6 +707,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind, #define OPENMP_TARGET_TEAMS_DISTRIBUTE_SIMD_CLAUSE(Name) \ case OMPC_##Name: \ return true; +#include "clang/Basic/OpenMPKinds.def" + default: + break; + } + break; + case OMPD_taskgroup: + switch (CKind) { +#define OPENMP_TASKGROUP_CLAUSE(Name) \ + case OMPC_##Name: \ + return true; #include "clang/Basic/OpenMPKinds.def" default: break; @@ -719,7 +731,6 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind, case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: - case OMPD_taskgroup: case OMPD_cancellation_point: case OMPD_declare_reduction: break; @@ -840,7 +851,8 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) { return Kind == OMPC_private || Kind == OMPC_firstprivate || Kind == OMPC_lastprivate || Kind == OMPC_linear || - Kind == OMPC_reduction; // TODO add next clauses like 'reduction'. + Kind == OMPC_reduction || + Kind == OMPC_task_reduction; // TODO add next clauses like 'reduction'. } bool clang::isOpenMPThreadPrivate(OpenMPClauseKind Kind) { diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp index 50b4fc34ad3a..5d75aa5a7528 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp @@ -571,8 +571,6 @@ class OpenBSDTargetInfo : public OSTargetInfo { public: OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { - this->TLSSupported = false; - switch (Triple.getArch()) { case llvm::Triple::x86: case llvm::Triple::x86_64: @@ -3401,6 +3399,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "xsaveopt", true); + setFeatureEnabledImpl(Features, "movbe", true); LLVM_FALLTHROUGH; case CK_BTVER1: setFeatureEnabledImpl(Features, "ssse3", true); @@ -4900,7 +4899,7 @@ class X86_64TargetInfo : public X86TargetInfo { case CC_Swift: case CC_X86VectorCall: case CC_IntelOclBicc: - case CC_X86_64Win64: + case CC_Win64: case CC_PreserveMost: case CC_PreserveAll: case CC_X86RegCall: @@ -6251,7 +6250,8 @@ class AArch64TargetInfo : public TargetInfo { enum FPUModeEnum { FPUMode, - NeonMode + NeonMode = (1 << 0), + SveMode = (1 << 1) }; unsigned FPU; @@ -6290,6 +6290,9 @@ class AArch64TargetInfo : public TargetInfo { LongDoubleWidth = LongDoubleAlign = SuitableAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); + // Make __builtin_ms_va_list available. + HasBuiltinMSVaList = true; + // {} in inline assembly are neon specifiers, not assembly variant // specifiers. NoAsmVariants = true; @@ -6385,12 +6388,15 @@ class AArch64TargetInfo : public TargetInfo { Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); - if (FPU == NeonMode) { + if (FPU & NeonMode) { Builder.defineMacro("__ARM_NEON", "1"); // 64-bit NEON supports half, single and double precision operations. Builder.defineMacro("__ARM_NEON_FP", "0xE"); } + if (FPU & SveMode) + Builder.defineMacro("__ARM_FEATURE_SVE", "1"); + if (CRC) Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); @@ -6426,7 +6432,8 @@ class AArch64TargetInfo : public TargetInfo { return Feature == "aarch64" || Feature == "arm64" || Feature == "arm" || - (Feature == "neon" && FPU == NeonMode); + (Feature == "neon" && (FPU & NeonMode)) || + (Feature == "sve" && (FPU & SveMode)); } bool handleTargetFeatures(std::vector &Features, @@ -6440,7 +6447,9 @@ class AArch64TargetInfo : public TargetInfo { for (const auto &Feature : Features) { if (Feature == "+neon") - FPU = NeonMode; + FPU |= NeonMode; + if (Feature == "+sve") + FPU |= SveMode; if (Feature == "+crc") CRC = 1; if (Feature == "+crypto") @@ -6467,6 +6476,7 @@ class AArch64TargetInfo : public TargetInfo { case CC_PreserveMost: case CC_PreserveAll: case CC_OpenCLKernel: + case CC_Win64: return CCCR_OK; default: return CCCR_Warning; @@ -6644,13 +6654,26 @@ class MicrosoftARM64TargetInfo MicrosoftARM64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : WindowsTargetInfo(Triple, Opts), Triple(Triple) { + + // This is an LLP64 platform. + // int:4, long:4, long long:8, long double:8. WCharType = UnsignedShort; + IntWidth = IntAlign = 32; + LongWidth = LongAlign = 32; + DoubleAlign = LongLongAlign = 64; + LongDoubleWidth = LongDoubleAlign = 64; + LongDoubleFormat = &llvm::APFloat::IEEEdouble(); + IntMaxType = SignedLongLong; + Int64Type = SignedLongLong; SizeType = UnsignedLongLong; + PtrDiffType = SignedLongLong; + IntPtrType = SignedLongLong; + TheCXXABI.set(TargetCXXABI::Microsoft); } void setDataLayout() override { - resetDataLayout("e-m:w-i64:64-i128:128-n32:64-S128"); + resetDataLayout("e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"); } void getVisualStudioDefines(const LangOptions &Opts, @@ -7470,7 +7493,7 @@ class SystemZTargetInfo : public TargetInfo { if (HasVector) Builder.defineMacro("__VX__"); if (Opts.ZVector) - Builder.defineMacro("__VEC__", "10301"); + Builder.defineMacro("__VEC__", "10302"); } ArrayRef getTargetBuiltins() const override { return llvm::makeArrayRef(BuiltinInfo, @@ -7497,6 +7520,7 @@ class SystemZTargetInfo : public TargetInfo { .Cases("arch9", "z196", 9) .Cases("arch10", "zEC12", 10) .Cases("arch11", "z13", 11) + .Cases("arch12", "z14", 12) .Default(-1); } bool setCPU(const std::string &Name) override { @@ -7513,6 +7537,8 @@ class SystemZTargetInfo : public TargetInfo { Features["transactional-execution"] = true; if (ISARevision >= 11) Features["vector"] = true; + if (ISARevision >= 12) + Features["vector-enhancements-1"] = true; return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } @@ -7542,6 +7568,7 @@ class SystemZTargetInfo : public TargetInfo { .Case("arch9", ISARevision >= 9) .Case("arch10", ISARevision >= 10) .Case("arch11", ISARevision >= 11) + .Case("arch12", ISARevision >= 12) .Case("htm", HasTransactionalExecution) .Case("vx", HasVector) .Default(false); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index bc902507c46e..f3527b0f39d1 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -2795,6 +2795,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); } + + case Builtin::BI__builtin_ms_va_start: + case Builtin::BI__builtin_ms_va_end: + return RValue::get( + EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), + BuiltinID == Builtin::BI__builtin_ms_va_start)); + + case Builtin::BI__builtin_ms_va_copy: { + // Lower this manually. We can't reliably determine whether or not any + // given va_copy() is for a Win64 va_list from the calling convention + // alone, because it's legal to do this from a System V ABI function. + // With opaque pointer types, we won't have enough information in LLVM + // IR to determine this from the argument types, either. Best to do it + // now, while we have enough information. + Address DestAddr = EmitMSVAListRef(E->getArg(0)); + Address SrcAddr = EmitMSVAListRef(E->getArg(1)); + + llvm::Type *BPP = Int8PtrPtrTy; + + DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), + DestAddr.getAlignment()); + SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), + SrcAddr.getAlignment()); + + Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); + return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -7223,31 +7250,6 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - if (BuiltinID == X86::BI__builtin_ms_va_start || - BuiltinID == X86::BI__builtin_ms_va_end) - return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), - BuiltinID == X86::BI__builtin_ms_va_start); - if (BuiltinID == X86::BI__builtin_ms_va_copy) { - // Lower this manually. We can't reliably determine whether or not any - // given va_copy() is for a Win64 va_list from the calling convention - // alone, because it's legal to do this from a System V ABI function. - // With opaque pointer types, we won't have enough information in LLVM - // IR to determine this from the argument types, either. Best to do it - // now, while we have enough information. - Address DestAddr = EmitMSVAListRef(E->getArg(0)); - Address SrcAddr = EmitMSVAListRef(E->getArg(1)); - - llvm::Type *BPP = Int8PtrPtrTy; - - DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), - DestAddr.getAlignment()); - SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), - SrcAddr.getAlignment()); - - Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); - return Builder.CreateStore(ArgPtr, DestAddr); - } - SmallVector Ops; // Find out if any arguments are required to be integer constant expressions. @@ -8790,12 +8792,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Undef}); } + case SystemZ::BI__builtin_s390_vfsqsb: case SystemZ::BI__builtin_s390_vfsqdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vfmasb: case SystemZ::BI__builtin_s390_vfmadb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8804,6 +8808,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Z}); } + case SystemZ::BI__builtin_s390_vfmssb: case SystemZ::BI__builtin_s390_vfmsdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8813,12 +8818,35 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); } + case SystemZ::BI__builtin_s390_vfnmasb: + case SystemZ::BI__builtin_s390_vfnmadb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); + } + case SystemZ::BI__builtin_s390_vfnmssb: + case SystemZ::BI__builtin_s390_vfnmsdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); + } + case SystemZ::BI__builtin_s390_vflpsb: case SystemZ::BI__builtin_s390_vflpdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vflnsb: case SystemZ::BI__builtin_s390_vflndb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8826,6 +8854,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); } + case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8835,8 +8864,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); (void)IsConstM4; (void)IsConstM5; - // Check whether this instance of vfidb can be represented via a LLVM - // standard intrinsic. We only support some combinations of M4 and M5. + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some combinations of M4 and M5. Intrinsic::ID ID = Intrinsic::not_intrinsic; switch (M4.getZExtValue()) { default: break; @@ -8861,11 +8890,76 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } - Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; + case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); return Builder.CreateCall(F, {X, M4Value, M5Value}); } + case SystemZ::BI__builtin_s390_vfmaxsb: + case SystemZ::BI__builtin_s390_vfmaxdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::maxnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; + case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } + case SystemZ::BI__builtin_s390_vfminsb: + case SystemZ::BI__builtin_s390_vfmindb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::minnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; + case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } // Vector intrisincs that output the post-instruction CC value. @@ -8932,10 +9026,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, INTRINSIC_WITH_CC(s390_vstrczhs); INTRINSIC_WITH_CC(s390_vstrczfs); + INTRINSIC_WITH_CC(s390_vfcesbs); INTRINSIC_WITH_CC(s390_vfcedbs); + INTRINSIC_WITH_CC(s390_vfchsbs); INTRINSIC_WITH_CC(s390_vfchdbs); + INTRINSIC_WITH_CC(s390_vfchesbs); INTRINSIC_WITH_CC(s390_vfchedbs); + INTRINSIC_WITH_CC(s390_vftcisb); INTRINSIC_WITH_CC(s390_vftcidb); #undef INTRINSIC_WITH_CC diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp index cee656a62fe7..316bf44cb1c3 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp @@ -50,7 +50,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86FastCall: return llvm::CallingConv::X86_FastCall; case CC_X86RegCall: return llvm::CallingConv::X86_RegCall; case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall; - case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64; + case CC_Win64: return llvm::CallingConv::Win64; case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV; case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS; case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; @@ -218,7 +218,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { return CC_IntelOclBicc; if (D->hasAttr()) - return IsWindows ? CC_C : CC_X86_64Win64; + return IsWindows ? CC_C : CC_Win64; if (D->hasAttr()) return IsWindows ? CC_X86_64SysV : CC_C; @@ -1877,8 +1877,8 @@ void CodeGenModule::ConstructAttributeList( // the function. const auto *TD = FD->getAttr(); TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "") + TargetCPU = ParsedAttr.Architecture; if (TargetCPU != "") FuncAttrs.addAttribute("target-cpu", TargetCPU); if (!Features.empty()) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp index b00d296fe34a..c9c450c32e3b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -956,7 +956,7 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_BORLAND_pascal; // FIXME: Create new DW_CC_ codes for these calling conventions. - case CC_X86_64Win64: + case CC_Win64: case CC_X86_64SysV: case CC_AAPCS: case CC_AAPCS_VFP: @@ -3970,10 +3970,10 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { const NamespaceDecl *NSDecl = UD.getNominatedNamespace(); if (!NSDecl->isAnonymousNamespace() || CGM.getCodeGenOpts().DebugExplicitImport) { + auto Loc = UD.getLocation(); DBuilder.createImportedModule( getCurrentContextDescriptor(cast(UD.getDeclContext())), - getOrCreateNamespace(NSDecl), - getLineNumber(UD.getLocation())); + getOrCreateNamespace(NSDecl), getOrCreateFile(Loc), getLineNumber(Loc)); } } @@ -3996,10 +3996,12 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { if (AT->getDeducedType().isNull()) return; if (llvm::DINode *Target = - getDeclarationOrDefinition(USD.getUnderlyingDecl())) + getDeclarationOrDefinition(USD.getUnderlyingDecl())) { + auto Loc = USD.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast(USD.getDeclContext())), Target, - getLineNumber(USD.getLocation())); + getOrCreateFile(Loc), getLineNumber(Loc)); + } } void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { @@ -4007,10 +4009,11 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { return; if (Module *M = ID.getImportedModule()) { auto Info = ExternalASTSource::ASTSourceDescriptor(*M); + auto Loc = ID.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast(ID.getDeclContext())), - getOrCreateModuleRef(Info, DebugTypeExtRefs), - getLineNumber(ID.getLocation())); + getOrCreateModuleRef(Info, DebugTypeExtRefs), getOrCreateFile(Loc), + getLineNumber(Loc)); } } @@ -4022,18 +4025,19 @@ CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) { if (VH) return cast(VH); llvm::DIImportedEntity *R; + auto Loc = NA.getLocation(); if (const auto *Underlying = dyn_cast(NA.getAliasedNamespace())) // This could cache & dedup here rather than relying on metadata deduping. R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast(NA.getDeclContext())), - EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()), - NA.getName()); + EmitNamespaceAlias(*Underlying), getOrCreateFile(Loc), + getLineNumber(Loc), NA.getName()); else R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast(NA.getDeclContext())), getOrCreateNamespace(cast(NA.getAliasedNamespace())), - getLineNumber(NA.getLocation()), NA.getName()); + getOrCreateFile(Loc), getLineNumber(Loc), NA.getName()); VH.reset(R); return R; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp index 9f40ee5a00a3..9572bd3543bd 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp @@ -73,9 +73,12 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, // cast alloca to the default address space when necessary. if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + auto CurIP = Builder.saveIP(); + Builder.SetInsertPoint(AllocaInsertPt); V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); + Builder.restoreIP(CurIP); } return Address(V, Align); @@ -3052,7 +3055,9 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF, SourceLocation loc, const llvm::Twine &name = "arrayidx") { if (inbounds) { - return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, loc, name); + return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, + CodeGenFunction::NotSubtraction, loc, + name); } else { return CGF.Builder.CreateGEP(ptr, indices, name); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp index 43c86495f3d3..1170b014ec7f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -1851,7 +1851,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *input; int amount = (isInc ? 1 : -1); - bool signedIndex = !isInc; + bool isSubtraction = !isInc; if (const AtomicType *atomicTy = type->getAs()) { type = atomicTy->getValueType(); @@ -1941,8 +1941,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); else - value = CGF.EmitCheckedInBoundsGEP(value, numElts, signedIndex, - E->getExprLoc(), "vla.inc"); + value = CGF.EmitCheckedInBoundsGEP( + value, numElts, /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "vla.inc"); // Arithmetic on function pointers (!) is just +-1. } else if (type->isFunctionType()) { @@ -1952,8 +1953,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.funcptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, - E->getExprLoc(), "incdec.funcptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.funcptr"); value = Builder.CreateBitCast(value, input->getType()); // For everything else, we can just do a simple increment. @@ -1962,8 +1964,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.ptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, - E->getExprLoc(), "incdec.ptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.ptr"); } // Vector increment/decrement. @@ -2044,7 +2047,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, sizeValue, "incdec.objptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, signedIndex, + value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, + /*SignedIndices=*/false, isSubtraction, E->getExprLoc(), "incdec.objptr"); value = Builder.CreateBitCast(value, input->getType()); } @@ -2663,7 +2667,6 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, } bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); - bool mayHaveNegativeGEPIndex = isSigned || isSubtraction; unsigned width = cast(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); @@ -2715,7 +2718,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, } else { index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); pointer = - CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex, + CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), "add.ptr"); } return pointer; @@ -2733,7 +2736,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(pointer, index, "add.ptr"); - return CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex, + return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), "add.ptr"); } @@ -2807,7 +2810,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (op.LHS->getType()->isPointerTy() || op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ false); + return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction); if (op.Ty->isSignedIntegerOrEnumerationType()) { switch (CGF.getLangOpts().getSignedOverflowBehavior()) { @@ -2878,7 +2881,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // If the RHS is not a pointer, then we have normal pointer // arithmetic. if (!op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ true); + return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction); // Otherwise, this is a pointer subtraction. @@ -3853,6 +3856,7 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef IdxList, bool SignedIndices, + bool IsSubtraction, SourceLocation Loc, const Twine &Name) { Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); @@ -3958,15 +3962,19 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, // pointer matches the sign of the total offset. llvm::Value *ValidGEP; auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); if (SignedIndices) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); ValidGEP = Builder.CreateAnd( Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), NoOffsetOverflow); - } else { + } else if (!SignedIndices && !IsSubtraction) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); + } else { + auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow); } llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a2ea0dec3e9d..d488bd4b30bf 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -643,6 +643,12 @@ enum OpenMPRTLFunction { // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); OMPRTL__kmpc_doacross_wait, + // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + OMPRTL__kmpc_task_reduction_init, + // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + OMPRTL__kmpc_task_reduction_get_th_data, // // Offloading related calls @@ -697,6 +703,414 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { } } +/// Check if the combiner is a call to UDR combiner and if it is so return the +/// UDR decl used for reduction. +static const OMPDeclareReductionDecl * +getReductionInit(const Expr *ReductionOp) { + if (auto *CE = dyn_cast(ReductionOp)) + if (auto *OVE = dyn_cast(CE->getCallee())) + if (auto *DRE = + dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts())) + if (auto *DRD = dyn_cast(DRE->getDecl())) + return DRD; + return nullptr; +} + +static void emitInitWithReductionInitializer(CodeGenFunction &CGF, + const OMPDeclareReductionDecl *DRD, + const Expr *InitOp, + Address Private, Address Original, + QualType Ty) { + if (DRD->getInitializer()) { + std::pair Reduction = + CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); + auto *CE = cast(InitOp); + auto *OVE = cast(CE->getCallee()); + const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); + const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); + auto *LHSDRE = cast(cast(LHS)->getSubExpr()); + auto *RHSDRE = cast(cast(RHS)->getSubExpr()); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(cast(LHSDRE->getDecl()), + [=]() -> Address { return Private; }); + PrivateScope.addPrivate(cast(RHSDRE->getDecl()), + [=]() -> Address { return Original; }); + (void)PrivateScope.Privatize(); + RValue Func = RValue::get(Reduction.second); + CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); + CGF.EmitIgnoredExpr(InitOp); + } else { + llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".init"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); + RValue InitRVal; + switch (CGF.getEvaluationKind(Ty)) { + case TEK_Scalar: + InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + break; + case TEK_Complex: + InitRVal = + RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + break; + case TEK_Aggregate: + InitRVal = RValue::getAggregate(LV.getAddress()); + break; + } + OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); + CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), + /*IsInitializer=*/false); + } +} + +/// \brief Emit initialization of arrays of complex types. +/// \param DestAddr Address of the array. +/// \param Type Type of array. +/// \param Init Initial expression of array. +/// \param SrcAddr Address of the original array. +static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, + QualType Type, const Expr *Init, + const OMPDeclareReductionDecl *DRD, + Address SrcAddr = Address::invalid()) { + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + DestAddr = + CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + if (DRD) + SrcAddr = + CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + + llvm::Value *SrcBegin = nullptr; + if (DRD) + SrcBegin = SrcAddr.getPointer(); + auto DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *SrcElementPHI = nullptr; + Address SrcElementCurrent = Address::invalid(); + if (DRD) { + SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, + "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + } + llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + { + CodeGenFunction::RunCleanupsScope InitScope(CGF); + if (DRD && (DRD->getInitializer() || !Init)) { + emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, + SrcElementCurrent, ElementTy); + } else + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + if (DRD) { + // Shift the address forward by one element. + auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); + } + + // Shift the address forward by one element. + auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + +LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { + if (const auto *OASE = dyn_cast(E)) + return CGF.EmitOMPArraySectionExpr(OASE); + if (const auto *ASE = dyn_cast(E)) + return CGF.EmitLValue(ASE); + auto *OrigVD = cast(cast(E)->getDecl()); + DeclRefExpr DRE(const_cast(OrigVD), + CGF.CapturedStmtInfo && + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + E->getType(), VK_LValue, E->getExprLoc()); + // Store the address of the original variable associated with the LHS + // implicit variable. + return CGF.EmitLValue(&DRE); +} + +LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, + const Expr *E) { + if (const auto *OASE = dyn_cast(E)) + return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + return LValue(); +} + +void ReductionCodeGen::emitAggregateInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), + DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(), + DRD, SharedLVal.getAddress()); +} + +ReductionCodeGen::ReductionCodeGen(ArrayRef Shareds, + ArrayRef Privates, + ArrayRef ReductionOps) { + ClausesData.reserve(Shareds.size()); + SharedAddresses.reserve(Shareds.size()); + Sizes.reserve(Shareds.size()); + BaseDecls.reserve(Shareds.size()); + auto IPriv = Privates.begin(); + auto IRed = ReductionOps.begin(); + for (const auto *Ref : Shareds) { + ClausesData.emplace_back(Ref, *IPriv, *IRed); + std::advance(IPriv, 1); + std::advance(IRed, 1); + } +} + +void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { + assert(SharedAddresses.size() == N && + "Number of generated lvalues must be exactly N."); + SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), + emitSharedLValueUB(CGF, ClausesData[N].Ref)); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + Sizes.emplace_back( + CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()), + nullptr); + return; + } + llvm::Value *Size; + llvm::Value *SizeInChars; + llvm::Type *ElemType = + cast(SharedAddresses[N].first.getPointer()->getType()) + ->getElementType(); + auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); + if (AsArraySection) { + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), + SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreateNUWAdd( + Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); + } else { + SizeInChars = CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()); + Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); + } + Sizes.emplace_back(SizeInChars, Size); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, + llvm::Value *Size) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + assert(!Size && !Sizes[N].second && + "Size should be nullptr for non-variably modified redution " + "items."); + return; + } + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + llvm::function_ref DefaultInit) { + assert(SharedAddresses.size() > N && "No variable was generated"); + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + QualType PrivateType = PrivateVD->getType(); + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + QualType SharedType = SharedAddresses[N].first.getType(); + SharedLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.ConvertTypeForMem(SharedType)), + SharedType, SharedAddresses[N].first.getBaseInfo()); + if (isa(ClausesData[N].Ref) || + CGF.getContext().getAsArrayType(PrivateVD->getType())) { + emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); + } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, + PrivateAddr, SharedLVal.getAddress(), + SharedLVal.getType()); + } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && + !CGF.isTrivialInitializer(PrivateVD->getInit())) { + CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, + PrivateVD->getType().getQualifiers(), + /*IsInitializer=*/false); + } +} + +bool ReductionCodeGen::needCleanups(unsigned N) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + return DTorKind != QualType::DK_none; +} + +void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + if (needCleanups(N)) { + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); + } +} + +static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + LValue BaseLV) { + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + if (auto *PtrTy = BaseTy->getAs()) + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + else { + BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), + BaseTy->castAs()); + } + BaseTy = BaseTy->getPointeeType(); + } + return CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.ConvertTypeForMem(ElTy)), + BaseLV.getType(), BaseLV.getBaseInfo()); +} + +static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + llvm::Type *BaseLVType, CharUnits BaseLVAlignment, + llvm::Value *Addr) { + Address Tmp = Address::invalid(); + Address TopTmp = Address::invalid(); + Address MostTopTmp = Address::invalid(); + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + Tmp = CGF.CreateMemTemp(BaseTy); + if (TopTmp.isValid()) + CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); + else + MostTopTmp = Tmp; + TopTmp = Tmp; + BaseTy = BaseTy->getPointeeType(); + } + llvm::Type *Ty = BaseLVType; + if (Tmp.isValid()) + Ty = Tmp.getElementType(); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); + if (Tmp.isValid()) { + CGF.Builder.CreateStore(Addr, Tmp); + return MostTopTmp; + } + return Address(Addr, BaseLVAlignment); +} + +Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + const DeclRefExpr *DE; + const VarDecl *OrigVD = nullptr; + if (auto *OASE = dyn_cast(ClausesData[N].Ref)) { + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast(Base); + OrigVD = cast(DE->getDecl()); + } else if (auto *ASE = dyn_cast(ClausesData[N].Ref)) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast(Base); + OrigVD = cast(DE->getDecl()); + } + if (OrigVD) { + BaseDecls.emplace_back(OrigVD); + auto OriginalBaseLValue = CGF.EmitLValue(DE); + LValue BaseLValue = + loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), + OriginalBaseLValue); + llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( + BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + llvm::Value *Ptr = + CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); + return castToBase(CGF, OrigVD->getType(), + SharedAddresses[N].first.getType(), + OriginalBaseLValue.getPointer()->getType(), + OriginalBaseLValue.getAlignment(), Ptr); + } + BaseDecls.emplace_back( + cast(cast(ClausesData[N].Ref)->getDecl())); + return PrivateAddr; +} + +bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + return DRD && DRD->getInitializer(); +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), @@ -1554,6 +1968,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; } + case OMPRTL__kmpc_task_reduction_init: { + // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); + break; + } + case OMPRTL__kmpc_task_reduction_get_th_data: { + // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -1904,6 +2338,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } +Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name) { + llvm::Twine VarName(Name, ".artificial."); + llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); + llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, SourceLocation()), + getThreadID(CGF, SourceLocation()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), + CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, + /*IsSigned=*/false), + getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; + return Address( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + VarLVType->getPointerTo(/*AddrSpace=*/0)), + CGM.getPointerAlign()); +} + /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { @@ -2699,6 +3154,8 @@ enum KmpTaskTFields { KmpTaskTStride, /// (Taskloops only) Is last iteration flag. KmpTaskTLastIter, + /// (Taskloops only) Reduction data. + KmpTaskTReductions, }; } // anonymous namespace @@ -3250,6 +3707,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_uint64 ub; // kmp_int64 st; // kmp_int32 liter; + // void * reductions; // }; auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); @@ -3273,6 +3731,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, addFieldToRecordDecl(C, RD, KmpUInt64Ty); addFieldToRecordDecl(C, RD, KmpInt64Ty); addFieldToRecordDecl(C, RD, KmpInt32Ty); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); } RD->completeDefinition(); return RD; @@ -3303,7 +3762,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// For taskloops: /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, -/// tt->shareds); +/// tt->reductions, tt->shareds); /// return 0; /// } /// \endcode @@ -3389,10 +3848,14 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); auto LILVal = CGF.EmitLValueForField(Base, *LIFI); auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); + auto RLVal = CGF.EmitLValueForField(Base, *RFI); + auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); CallArgs.push_back(LIParam); + CallArgs.push_back(RParam); } CallArgs.push_back(SharedsParam); @@ -4155,6 +4618,16 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, cast(cast(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); + // Store reductions address. + LValue RedLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); + if (Data.Reductions) + CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + else { + CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.getContext().VoidPtrTy); + } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { UpLoc, @@ -4680,6 +5153,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } +/// Generates unique name for artificial threadprivate variables. +/// Format is: "." "_" +static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, + unsigned N) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; + return Out.str(); +} + +/// Emits reduction initializer function: +/// \code +/// void @.red_init(void* %arg) { +/// %0 = bitcast void* %arg to * +/// store , * %0 +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_init.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + LValue SharedLVal; + // If initializer uses initializer from declare reduction construct, emit a + // pointer to the address of the original reduction item (reuired by reduction + // initializer) + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = + CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); + } else { + SharedLVal = CGF.MakeNaturalAlignAddrLValue( + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + CGM.getContext().VoidPtrTy); + } + // Emit the initializer: + // %0 = bitcast void* %arg to * + // store , * %0 + RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, + [](CodeGenFunction &) { return false; }); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction combiner function: +/// \code +/// void @.red_comb(void* %arg0, void* %arg1) { +/// %lhs = bitcast void* %arg0 to * +/// %rhs = bitcast void* %arg1 to * +/// %2 = (* %lhs, * %rhs) +/// store %2, * %lhs +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N, + const Expr *ReductionOp, + const Expr *LHS, const Expr *RHS, + const Expr *PrivateRef) { + auto &C = CGM.getContext(); + auto *LHSVD = cast(cast(LHS)->getDecl()); + auto *RHSVD = cast(cast(RHS)->getDecl()); + FunctionArgList Args; + ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&ParamInOut); + Args.emplace_back(&ParamIn); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_comb.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Remap lhs and rhs variables to the addresses of the function arguments. + // %lhs = bitcast void* %arg0 to * + // %rhs = bitcast void* %arg1 to * + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamInOut), + C.getPointerType(C.VoidPtrTy).castAs()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); + }); + PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamIn), + C.getPointerType(C.VoidPtrTy).castAs()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); + }); + PrivateScope.Privatize(); + // Emit the combiner body: + // %2 = ( *%lhs, *%rhs) + // store %2, * %lhs + CGM.getOpenMPRuntime().emitSingleReductionCombiner( + CGF, ReductionOp, PrivateRef, cast(LHS), + cast(RHS)); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction finalizer function: +/// \code +/// void @.red_fini(void* %arg) { +/// %0 = bitcast void* %arg to * +/// (* %0) +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + if (!RCG.needCleanups(N)) + return nullptr; + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_fini.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Emit the finalizer body: + // (* %0) + RCG.emitCleanups(CGF, N, PrivateAddr); + CGF.FinishFunction(); + return Fn; +} + +llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef LHSExprs, + ArrayRef RHSExprs, const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) + return nullptr; + + // Build typedef struct: + // kmp_task_red_input { + // void *reduce_shar; // shared reduction item + // size_t reduce_size; // size of data item + // void *reduce_init; // data initialization routine + // void *reduce_fini; // data finalization routine + // void *reduce_comb; // data combiner routine + // kmp_task_red_flags_t flags; // flags for additional info from compiler + // } kmp_task_red_input_t; + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RD->startDefinition(); + const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); + const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FlagsFD = addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); + RD->completeDefinition(); + QualType RDType = C.getRecordType(RD); + unsigned Size = Data.ReductionVars.size(); + llvm::APInt ArraySize(/*numBits=*/64, Size); + QualType ArrayRDType = C.getConstantArrayType( + RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + // kmp_task_red_input_t .rd_input.[Size]; + Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { + // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; + llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), + llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; + llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( + TaskRedInput.getPointer(), Idxs, + /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, + ".rd_input.gep."); + LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); + // ElemLVal.reduce_shar = &Shareds[Cnt]; + LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); + RCG.emitSharedLValue(CGF, Cnt); + llvm::Value *CastedShared = + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + RCG.emitAggregateType(CGF, Cnt); + llvm::Value *SizeValInChars; + llvm::Value *SizeVal; + std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); + // We use delayed creation/initialization for VLAs, array sections and + // custom reduction initializations. It is required because runtime does not + // provide the way to pass the sizes of VLAs/array sections to + // initializer/combiner/finalizer functions and does not pass the pointer to + // original reduction item to the initializer. Instead threadprivate global + // variables are used to store these values and use them in the functions. + bool DelayedCreation = !!SizeVal; + SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, + /*isSigned=*/false); + LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); + CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); + // ElemLVal.reduce_init = init; + LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); + llvm::Value *InitAddr = + CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); + CGF.EmitStoreOfScalar(InitAddr, InitLVal); + DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); + // ElemLVal.reduce_fini = fini; + LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); + llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); + llvm::Value *FiniAddr = Fini + ? CGF.EmitCastToVoidPtr(Fini) + : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); + // ElemLVal.reduce_comb = comb; + LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); + llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( + CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], + RHSExprs[Cnt], Data.ReductionCopies[Cnt])); + CGF.EmitStoreOfScalar(CombAddr, CombLVal); + // ElemLVal.flags = 0; + LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); + if (DelayedCreation) { + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), + FlagsLVal); + } else + CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + } + // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), + CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); +} + +void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, + SourceLocation Loc, + ReductionCodeGen &RCG, + unsigned N) { + auto Sizes = RCG.getSizes(N); + // Emit threadprivate global variable if the type is non-constant + // (Sizes.second = nullptr). + if (Sizes.second) { + llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, + /*isSigned=*/false); + Address SizeAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); + } + // Store address of the original reduction item if custom initializer is used. + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + SharedAddr, /*IsVolatile=*/false); + } +} + +Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) { + // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), + CGM.VoidPtrTy)}; + return Address( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), + SharedLVal.getAlignment()); +} + void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h index 6f460f121791..5dcf999bea37 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -96,15 +96,106 @@ struct OMPTaskDataTy final { SmallVector FirstprivateInits; SmallVector LastprivateVars; SmallVector LastprivateCopies; + SmallVector ReductionVars; + SmallVector ReductionCopies; + SmallVector ReductionOps; SmallVector, 4> Dependences; llvm::PointerIntPair Final; llvm::PointerIntPair Schedule; llvm::PointerIntPair Priority; + llvm::Value *Reductions = nullptr; unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; }; +/// Class intended to support codegen of all kind of the reduction clauses. +class ReductionCodeGen { +private: + /// Data required for codegen of reduction clauses. + struct ReductionData { + /// Reference to the original shared item. + const Expr *Ref = nullptr; + /// Helper expression for generation of private copy. + const Expr *Private = nullptr; + /// Helper expression for generation reduction operation. + const Expr *ReductionOp = nullptr; + ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp) + : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {} + }; + /// List of reduction-based clauses. + SmallVector ClausesData; + + /// List of addresses of original shared variables/expressions. + SmallVector, 4> SharedAddresses; + /// Sizes of the reduction items in chars. + SmallVector, 4> Sizes; + /// Base declarations for the reduction items. + SmallVector BaseDecls; + + /// Emits lvalue for shared expresion. + LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E); + /// Emits upper bound for shared expression (if array section). + LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E); + /// Performs aggregate initialization. + /// \param N Number of reduction item in the common list. + /// \param PrivateAddr Address of the corresponding private item. + /// \param SharedLVal Address of the original shared variable. + /// \param DRD Declare reduction construct used for reduction item. + void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD); + +public: + ReductionCodeGen(ArrayRef Shareds, + ArrayRef Privates, + ArrayRef ReductionOps); + /// Emits lvalue for a reduction item. + /// \param N Number of the reduction item. + void emitSharedLValue(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + void emitAggregateType(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + /// \param Size Size of the type in chars. + void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size); + /// Performs initialization of the private copy for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + /// \param DefaultInit Default initialization sequence that should be + /// performed if no reduction specific initialization is found. + /// \param SharedLVal Address of the original shared variable. + void + emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, + LValue SharedLVal, + llvm::function_ref DefaultInit); + /// Returns true if the private copy requires cleanups. + bool needCleanups(unsigned N); + /// Emits cleanup code for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr); + /// Adjusts \p PrivatedAddr for using instead of the original variable + /// address in normal operations. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr); + /// Returns LValue for the reduction item. + LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; } + /// Returns the size of the reduction item (in chars and total number of + /// elements in the item), or nullptr, if the size is a constant. + std::pair getSizes(unsigned N) const { + return Sizes[N]; + } + /// Returns the base declaration of the reduction item. + const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; } + /// Returns true if the initialization of the reduction item uses initializer + /// from declare reduction construct. + bool usesReductionInitializer(unsigned N) const; +}; + class CGOpenMPRuntime { protected: CodeGenModule &CGM; @@ -121,7 +212,7 @@ class CGOpenMPRuntime { /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Lambda codegen specific to an accelerator device. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, @@ -699,7 +790,7 @@ class CGOpenMPRuntime { /// \param Loc Clang source location. /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param DispatchValues struct containing llvm values for lower bound, upper /// bound, and chunk expression. @@ -723,7 +814,7 @@ class CGOpenMPRuntime { /// \param Loc Clang source location. /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. @@ -732,7 +823,7 @@ class CGOpenMPRuntime { /// \param UB Address of the output variable in which the upper iteration /// number is returned. /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. + /// returned necessary to generated the static_chunked scheduled loop. /// \param Chunk Value of the chunk for the static_chunked scheduled loop. /// For the default (nullptr) value, the chunk 1 will be used. /// @@ -747,7 +838,7 @@ class CGOpenMPRuntime { /// \param Loc Clang source location. /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. @@ -756,7 +847,7 @@ class CGOpenMPRuntime { /// \param UB Address of the output variable in which the upper iteration /// number is returned. /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. + /// returned necessary to generated the static_chunked scheduled loop. /// \param Chunk Value of the chunk for the static_chunked scheduled loop. /// For the default (nullptr) value, the chunk 1 will be used. /// @@ -773,7 +864,7 @@ class CGOpenMPRuntime { /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, @@ -792,7 +883,7 @@ class CGOpenMPRuntime { /// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, /// kmp_int[32|64] *p_stride); /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. /// \param LB Address of the output variable in which the lower iteration @@ -844,6 +935,14 @@ class CGOpenMPRuntime { SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); + /// Creates artificial threadprivate variable with name \p Name and type \p + /// VarType. + /// \param VarType Type of the artificial threadprivate variable. + /// \param Name Name of the artificial threadprivate variable. + virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name); + /// \brief Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef Vars, @@ -1002,6 +1101,51 @@ class CGOpenMPRuntime { ArrayRef ReductionOps, ReductionOptionsTy Options); + /// Emit a code for initialization of task reduction clause. Next code + /// should be emitted for reduction: + /// \code + /// + /// _task_red_item_t red_data[n]; + /// ... + /// red_data[i].shar = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit; + /// red_data[i].f_fini = (void*)RedDest; + /// red_data[i].f_comb = (void*)RedOp; + /// red_data[i].flags = ; + /// ... + /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// \endcode + /// + /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. + /// \param Data Additional data for task generation like tiedness, final + /// state, list of privates, reductions etc. + virtual llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, + SourceLocation Loc, + ArrayRef LHSExprs, + ArrayRef RHSExprs, + const OMPTaskDataTy &Data); + + /// Required to resolve existing problems in the runtime. Emits threadprivate + /// variables to store the size of the VLAs/array sections for + /// initializer/combiner/finalizer functions + emits threadprivate variable to + /// store the pointer to the original reduction item for the custom + /// initializer defined by declare reduction construct. + /// \param RCG Allows to reuse an existing data for the reductions. + /// \param N Reduction item for which fixups must be emitted. + virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N); + + /// Get the address of `void *` type of the privatue copy of the reduction + /// item specified by the \p SharedLVal. + /// \param ReductionsPtr Pointer to the reduction data returned by the + /// emitTaskReductionInit function. + /// \param SharedLVal Address of the original reduction item. + virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal); + /// \brief Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); @@ -1029,7 +1173,7 @@ class CGOpenMPRuntime { /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Code generation sequence for the \a D directive. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index 71797e2e6fbe..6135cf31d176 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -549,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } -/// Check if the combiner is a call to UDR combiner and if it is so return the -/// UDR decl used for reduction. -static const OMPDeclareReductionDecl * -getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast(ReductionOp)) - if (auto *OVE = dyn_cast(CE->getCallee())) - if (auto *DRE = - dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast(DRE->getDecl())) - return DRD; - return nullptr; -} - -static void emitInitWithReductionInitializer(CodeGenFunction &CGF, - const OMPDeclareReductionDecl *DRD, - const Expr *InitOp, - Address Private, Address Original, - QualType Ty) { - if (DRD->getInitializer()) { - std::pair Reduction = - CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast(InitOp); - auto *OVE = cast(CE->getCallee()); - const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); - const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast(cast(LHS)->getSubExpr()); - auto *RHSDRE = cast(cast(RHS)->getSubExpr()); - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(cast(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); - PrivateScope.addPrivate(cast(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); - (void)PrivateScope.Privatize(); - RValue Func = RValue::get(Reduction.second); - CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); - CGF.EmitIgnoredExpr(InitOp); - } else { - llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); - LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); - RValue InitRVal; - switch (CGF.getEvaluationKind(Ty)) { - case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); - break; - case TEK_Complex: - InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); - break; - case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); - break; - } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); - CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), - /*IsInitializer=*/false); - } -} - -/// \brief Emit initialization of arrays of complex types. -/// \param DestAddr Address of the array. -/// \param Type Type of array. -/// \param Init Initial expression of array. -/// \param SrcAddr Address of the original array. -static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, - QualType Type, const Expr *Init, - Address SrcAddr = Address::invalid()) { - auto *DRD = getReductionInit(Init); - // Perform element-by-element initialization. - QualType ElementTy; - - // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); - if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); - - llvm::Value *SrcBegin = nullptr; - if (DRD) - SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); - // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); - // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = - CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); - CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); - CGF.EmitBlock(BodyBB); - - CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); - - llvm::PHINode *SrcElementPHI = nullptr; - Address SrcElementCurrent = Address::invalid(); - if (DRD) { - SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPHI->addIncoming(SrcBegin, EntryBB); - SrcElementCurrent = - Address(SrcElementPHI, - SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - } - llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( - DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); - DestElementPHI->addIncoming(DestBegin, EntryBB); - Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - - // Emit copy. - { - CodeGenFunction::RunCleanupsScope InitScope(CGF); - if (DRD && (DRD->getInitializer() || !Init)) { - emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, - SrcElementCurrent, ElementTy); - } else - CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), - /*IsInitializer=*/false); - } - - if (DRD) { - // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); - } - - // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - // Check whether we've reached the end. - auto Done = - CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); - CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); - - // Done. - CGF.EmitBlock(DoneBB, /*IsFinished=*/true); -} - void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { @@ -1051,254 +901,107 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV, llvm::Value *Addr) { - Address Tmp = Address::invalid(); - Address TopTmp = Address::invalid(); - Address MostTopTmp = Address::invalid(); - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - Tmp = CGF.CreateMemTemp(BaseTy); - if (TopTmp.isValid()) - CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); - else - MostTopTmp = Tmp; - TopTmp = Tmp; - BaseTy = BaseTy->getPointeeType(); - } - llvm::Type *Ty = BaseLV.getPointer()->getType(); - if (Tmp.isValid()) - Ty = Tmp.getElementType(); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); - if (Tmp.isValid()) { - CGF.Builder.CreateStore(Addr, Tmp); - return MostTopTmp; - } - return Address(Addr, BaseLV.getAlignment()); -} - -static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV) { - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs()) - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs()); - } - BaseTy = BaseTy->getPointeeType(); - } - return CGF.MakeAddrLValue( - Address( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), - BaseLV.getAlignment()), - BaseLV.getType(), BaseLV.getBaseInfo()); -} - void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; + SmallVector Shareds; + SmallVector Privates; + SmallVector ReductionOps; + SmallVector LHSs; + SmallVector RHSs; for (const auto *C : D.getClausesOfKind()) { - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); auto IRed = C->reduction_ops().begin(); - for (auto IRef : C->varlists()) { - auto *LHSVD = cast(cast(*ILHS)->getDecl()); - auto *RHSVD = cast(cast(*IRHS)->getDecl()); - auto *PrivateVD = cast(cast(*IPriv)->getDecl()); - auto *DRD = getReductionInit(*IRed); - if (auto *OASE = dyn_cast(IRef)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast(Base)) - Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast(Base); - auto *OrigVD = cast(DE->getDecl()); - auto OASELValueLB = EmitOMPArraySectionExpr(OASE); - auto OASELValueUB = - EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = - loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), - OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { - return OASELValueLB.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, - OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit VarDecl with copy init for arrays. - // Get the address of the original variable captured in current - // captured region. - auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), - OASELValueLB.getPointer()); - Size = Builder.CreateNUWAdd( - Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get(Size)); - EmitVariablyModifiedType(PrivateVD->getType()); - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, - OASELValueLB.getAddress()); - EmitAutoVarCleanups(Emission); - // Emit private VarDecl with reduction init. - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - OASELValueLB.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), - OASELValueLB.getType(), OriginalBaseLValue, - Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } else if (auto *ASE = dyn_cast(IRef)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast(Base); - auto *OrigVD = cast(DE->getDecl()); - auto ASELValue = EmitLValue(ASE); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = loadToBegin( - *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate( - LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, - OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - ASELValue.getAddress(), - ASELValue.getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - ASELValue.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), ASELValue.getType(), - OriginalBaseLValue, Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), - "rhs.begin"); - }); - } else { - auto *OrigVD = cast(cast(IRef)->getDecl()); - QualType Type = PrivateVD->getType(); - if (getContext().getAsArrayType(Type)) { - // Store the address of the original variable associated with the LHS - // implicit variable. - DeclRefExpr DRE(const_cast(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - Address OriginalAddr = EmitLValue(&DRE).getAddress(); - PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, - LHSVD]() -> Address { - OriginalAddr = Builder.CreateElementBitCast( - OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); - return OriginalAddr; - }); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - if (Type->isVariablyModifiedType()) { - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get( - getTypeSize(OrigVD->getType().getNonReferenceType()))); - EmitVariablyModifiedType(Type); - } - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, OriginalAddr); - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); - }); - } else { - // Store the address of the original variable associated with the LHS - // implicit variable. - Address OriginalAddr = Address::invalid(); - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, - &OriginalAddr]() -> Address { - DeclRefExpr DRE(const_cast(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - OriginalAddr = EmitLValue(&DRE).getAddress(); - return OriginalAddr; - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - OriginalAddr, - PrivateVD->getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - return Addr; - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } - } - ++ILHS; - ++IRHS; - ++IPriv; - ++IRed; + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Shareds.emplace_back(Ref); + Privates.emplace_back(*IPriv); + ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); } } + ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto ILHS = LHSs.begin(); + auto IRHS = RHSs.begin(); + auto IPriv = Privates.begin(); + for (const auto *IRef : Shareds) { + auto *PrivateVD = cast(cast(*IPriv)->getDecl()); + // Emit private VarDecl with reduction init. + RedCG.emitSharedLValue(*this, Count); + RedCG.emitAggregateType(*this, Count); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), + RedCG.getSharedLValue(Count), + [&Emission](CodeGenFunction &CGF) { + CGF.EmitAutoVarInit(Emission); + return true; + }); + EmitAutoVarCleanups(Emission); + Address BaseAddr = RedCG.adjustPrivateAddress( + *this, Count, Emission.getAllocatedAddress()); + bool IsRegistered = PrivateScope.addPrivate( + RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + auto *LHSVD = cast(cast(*ILHS)->getDecl()); + auto *RHSVD = cast(cast(*IRHS)->getDecl()); + if (isa(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if (isa(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); + }); + } else { + QualType Type = PrivateVD->getType(); + bool IsArray = getContext().getAsArrayType(Type) != nullptr; + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + // Store the address of the original variable associated with the LHS + // implicit variable. + if (IsArray) { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); + } + PrivateScope.addPrivate( + LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { + return IsArray + ? Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + : GetAddrOfLocalVar(PrivateVD); + }); + } + ++ILHS; + ++IRHS; + ++IPriv; + ++Count; + } } void CodeGenFunction::EmitOMPReductionClauseFinal( @@ -2994,11 +2697,32 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, ++ID; } } + SmallVector LHSs; + SmallVector RHSs; + for (const auto *C : S.getClausesOfKind()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, S.getLocStart(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); @@ -3053,6 +2777,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } + if (Data.Reductions) { + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); + for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = + Address(CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType( + Data.ReductionCopies[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + Scope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } (void)Scope.Privatize(); Action.Enter(CGF); @@ -3714,6 +3466,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_firstprivate: case OMPC_lastprivate: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h index 5933e029be8d..753dd92f3071 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h @@ -3589,12 +3589,19 @@ class CodeGenFunction : public CodeGenTypeCache { /// nonnull, if \p LHS is marked _Nonnull. void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc); + /// An enumeration which makes it easier to specify whether or not an + /// operation is a subtraction. + enum { NotSubtraction = false, IsSubtraction = true }; + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to /// detect undefined behavior when the pointer overflow sanitizer is enabled. /// \p SignedIndices indicates whether any of the GEP indices are signed. + /// \p IsSubtraction indicates whether the expression used to form the GEP + /// is a subtraction. llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef IdxList, bool SignedIndices, + bool IsSubtraction, SourceLocation Loc, const Twine &Name = ""); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp index 4b15b8ac4c71..5561d4520cc8 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -4499,18 +4499,19 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap &FeatureMap, // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. - ParsedAttr.first.insert(ParsedAttr.first.begin(), + ParsedAttr.Features.insert(ParsedAttr.Features.begin(), Target.getTargetOpts().FeaturesAsWritten.begin(), Target.getTargetOpts().FeaturesAsWritten.end()); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "") + TargetCPU = ParsedAttr.Architecture ; // Now populate the feature map, first with the TargetCPU which is either // the default or a new one from the target attribute string. Then we'll use // the passed in features (FeaturesAsWritten) along with the new ones from // the attribute. - Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.first); + Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, + ParsedAttr.Features); } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp index 6a31dfe53d64..a6f21d8ddcfb 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/MacroPPCallbacks.cpp @@ -26,8 +26,8 @@ void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II, if (MI.isFunctionLike()) { Name << '('; - if (!MI.arg_empty()) { - MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end(); + if (!MI.param_empty()) { + MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); for (; AI + 1 != E; ++AI) { Name << (*AI)->getName(); Name << ','; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 37ecc05aa1ee..d0760b9cc2a6 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -152,6 +152,9 @@ class PCHContainerGenerator : public ASTConsumer { CodeGenOpts.CodeModel = "default"; CodeGenOpts.ThreadModel = "single"; CodeGenOpts.DebugTypeExtRefs = true; + // When building a module MainFileName is the name of the modulemap file. + CodeGenOpts.MainFileName = + LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index eeebd60a2d20..c17828974e92 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -4785,7 +4785,8 @@ class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, - DarwinPCS + DarwinPCS, + Win64 }; private: @@ -4823,10 +4824,14 @@ class AArch64ABIInfo : public SwiftABIInfo { Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { - return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) - : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) + : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) + : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef scalars, bool asReturnValue) const override { @@ -5332,6 +5337,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } +Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// @@ -8494,6 +8507,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; + else if (Triple.isOSWindows()) + Kind = AArch64ABIInfo::Win64; return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp index 42478013ccec..1d35d6e78cca 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp @@ -1275,6 +1275,13 @@ bool Driver::HandleImmediateArgs(const Compilation &C) { // we were requested to print out all option names that start with "-foo". // For example, "--autocomplete=-fsyn" is expanded to "-fsyntax-only". SuggestedCompletions = Opts->findByPrefix(PassedFlags, DisableFlags); + + // We have to query the -W flags manually as they're not in the OptTable. + // TODO: Find a good way to add them to OptTable instead and them remove + // this code. + for (StringRef S : DiagnosticIDs::getDiagnosticFlags()) + if (S.startswith(PassedFlags)) + SuggestedCompletions.push_back(S); } else { // If the flag is in the form of "--autocomplete=foo,bar", we were // requested to print out all option values for "-foo" that start with diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp index e759e3ec619a..b82cc2d4fa5d 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp @@ -2070,10 +2070,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (D.isUsingLTO()) { Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); - // The Darwin linker currently uses the legacy LTO API, which does not - // support LTO unit features (CFI, whole program vtable opt) under - // ThinLTO. - if (!getToolChain().getTriple().isOSDarwin() || + // The Darwin and PS4 linkers currently use the legacy LTO API, which + // does not support LTO unit features (CFI, whole program vtable opt) + // under ThinLTO. + if (!(getToolChain().getTriple().isOSDarwin() || + getToolChain().getTriple().isPS4()) || D.getLTOMode() == LTOK_Full) CmdArgs.push_back("-flto-unit"); } @@ -3200,9 +3201,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls); Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions); Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names); - // Emulated TLS is enabled by default on Android, and can be enabled manually - // with -femulated-tls. - bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isWindowsCygwinEnvironment(); + // Emulated TLS is enabled by default on Android and OpenBSD, and can be enabled + // manually with -femulated-tls. + bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isOSOpenBSD() || + Triple.isWindowsCygwinEnvironment(); if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls, EmulatedTLSDefault)) CmdArgs.push_back("-femulated-tls"); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp index ba1a5ee95594..0d63858f2cd4 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1262,28 +1262,58 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const { } } - // If no OSX or iOS target has been specified, try to guess platform - // from arch name and compute the version from the triple. + // If no OS targets have been specified, try to guess platform from -target + // or arch name and compute the version from the triple. if (OSXTarget.empty() && iOSTarget.empty() && TvOSTarget.empty() && WatchOSTarget.empty()) { - StringRef MachOArchName = getMachOArchName(Args); - unsigned Major, Minor, Micro; - if (MachOArchName == "armv7" || MachOArchName == "armv7s" || - MachOArchName == "arm64") { - getTriple().getiOSVersion(Major, Minor, Micro); - llvm::raw_string_ostream(iOSTarget) << Major << '.' << Minor << '.' - << Micro; - } else if (MachOArchName == "armv7k") { - getTriple().getWatchOSVersion(Major, Minor, Micro); - llvm::raw_string_ostream(WatchOSTarget) << Major << '.' << Minor << '.' - << Micro; - } else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" && - MachOArchName != "armv7em") { - if (!getTriple().getMacOSXVersion(Major, Minor, Micro)) { - getDriver().Diag(diag::err_drv_invalid_darwin_version) - << getTriple().getOSName(); + llvm::Triple::OSType OSTy = llvm::Triple::UnknownOS; + + // Set the OSTy based on -target if -arch isn't present. + if (Args.hasArg(options::OPT_target) && !Args.hasArg(options::OPT_arch)) { + OSTy = getTriple().getOS(); + } else { + StringRef MachOArchName = getMachOArchName(Args); + if (MachOArchName == "armv7" || MachOArchName == "armv7s" || + MachOArchName == "arm64") + OSTy = llvm::Triple::IOS; + else if (MachOArchName == "armv7k") + OSTy = llvm::Triple::WatchOS; + else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" && + MachOArchName != "armv7em") + OSTy = llvm::Triple::MacOSX; + } + + + if (OSTy != llvm::Triple::UnknownOS) { + unsigned Major, Minor, Micro; + std::string *OSTarget; + + switch (OSTy) { + case llvm::Triple::Darwin: + case llvm::Triple::MacOSX: + if (!getTriple().getMacOSXVersion(Major, Minor, Micro)) + getDriver().Diag(diag::err_drv_invalid_darwin_version) + << getTriple().getOSName(); + OSTarget = &OSXTarget; + break; + case llvm::Triple::IOS: + getTriple().getiOSVersion(Major, Minor, Micro); + OSTarget = &iOSTarget; + break; + case llvm::Triple::TvOS: + getTriple().getOSVersion(Major, Minor, Micro); + OSTarget = &TvOSTarget; + break; + case llvm::Triple::WatchOS: + getTriple().getWatchOSVersion(Major, Minor, Micro); + OSTarget = &WatchOSTarget; + break; + default: + llvm_unreachable("Unexpected OS type"); + break; } - llvm::raw_string_ostream(OSXTarget) << Major << '.' << Minor << '.' + + llvm::raw_string_ostream(*OSTarget) << Major << '.' << Minor << '.' << Micro; } } diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp index d8b8fe8f0bfe..78053aafd16e 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -46,6 +46,9 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (llvm::sys::path::stem(Exec).equals_lower("lld")) { CmdArgs.push_back("-flavor"); CmdArgs.push_back("gnu"); + + CmdArgs.push_back("-z"); + CmdArgs.push_back("rodynamic"); } if (!D.SysRoot.empty()) diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp index ad5f7df50d2e..bc26ee1de46d 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2471,7 +2471,8 @@ void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, (!V.isOlderThan(4, 7, 0) || getTriple().isAndroid())) || getTriple().getOS() == llvm::Triple::NaCl || (getTriple().getVendor() == llvm::Triple::MipsTechnologies && - !getTriple().hasEnvironment()); + !getTriple().hasEnvironment()) || + getTriple().getOS() == llvm::Triple::Solaris; if (DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, UseInitArrayDefault)) diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.cpp index 78797c49d7b6..de98d11b2dc7 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.cpp @@ -126,7 +126,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, Solaris::Solaris(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) - : Generic_GCC(D, Triple, Args) { + : Generic_ELF(D, Triple, Args) { GCCInstallation.init(Triple, Args); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.h b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.h index edb44373b31d..787917afab6e 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.h +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Solaris.h @@ -50,7 +50,7 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool { namespace toolchains { -class LLVM_LIBRARY_VISIBILITY Solaris : public Generic_GCC { +class LLVM_LIBRARY_VISIBILITY Solaris : public Generic_ELF { public: Solaris(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp index c6e90a9175e1..46ea06b880ed 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp @@ -1383,7 +1383,8 @@ class AnnotatingParser { if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, tok::comma, tok::semi, tok::kw_return, tok::colon, - tok::equal, tok::kw_delete, tok::kw_sizeof) || + tok::equal, tok::kw_delete, tok::kw_sizeof, + tok::kw_throw) || PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, TT_UnaryOperator, TT_CastRParen)) return TT_UnaryOperator; diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp index 4b57919d1929..faac5a371c26 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp @@ -747,7 +747,7 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords, Keywords.kw_let, Keywords.kw_var, tok::kw_const, Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, Keywords.kw_instanceof, Keywords.kw_interface, - Keywords.kw_throws)); + Keywords.kw_throws, Keywords.kw_from)); } static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index 00f6b9b46f03..b2c14554a4b5 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -1683,6 +1683,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, Opts.CPlusPlus11 = Std.isCPlusPlus11(); Opts.CPlusPlus14 = Std.isCPlusPlus14(); Opts.CPlusPlus1z = Std.isCPlusPlus1z(); + Opts.CPlusPlus2a = Std.isCPlusPlus2a(); Opts.Digraphs = Std.hasDigraphs(); Opts.GNUMode = Std.isGNUMode(); Opts.GNUInline = !Opts.C99 && !Opts.CPlusPlus; diff --git a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp index 71420df00025..64128dfdf534 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp @@ -374,10 +374,13 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, else if (!LangOpts.GNUMode && LangOpts.Digraphs) Builder.defineMacro("__STDC_VERSION__", "199409L"); } else { + // FIXME: Use correct value for C++20. + if (LangOpts.CPlusPlus2a) + Builder.defineMacro("__cplusplus", "201707L"); // C++17 [cpp.predefined]p1: // The name __cplusplus is defined to the value 201703L when compiling a // C++ translation unit. - if (LangOpts.CPlusPlus1z) + else if (LangOpts.CPlusPlus1z) Builder.defineMacro("__cplusplus", "201703L"); // C++1y [cpp.predefined]p1: // The name __cplusplus is defined to the value 201402L when compiling a diff --git a/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 5336de1f7468..914039ad5bb1 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -38,8 +38,8 @@ static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, if (MI.isFunctionLike()) { OS << '('; - if (!MI.arg_empty()) { - MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end(); + if (!MI.param_empty()) { + MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); for (; AI+1 != E; ++AI) { OS << (*AI)->getName(); OS << ','; diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp index e93f737c47fd..5efa6aeaf760 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp @@ -10,6 +10,7 @@ #include "clang/Rewrite/Frontend/FrontendActions.h" #include "clang/AST/ASTConsumer.h" #include "clang/Basic/CharInfo.h" +#include "clang/Config/config.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Frontend/FrontendDiagnostic.h" diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index 38be684cec86..21686b8c78ea 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -21,6 +21,7 @@ #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Config/config.h" #include "clang/Lex/Lexer.h" #include "clang/Rewrite/Core/Rewriter.h" #include "llvm/ADT/DenseSet.h" diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index 5a1e001d65b8..e0d813df70f8 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -20,6 +20,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/SourceManager.h" +#include "clang/Config/config.h" #include "clang/Lex/Lexer.h" #include "clang/Rewrite/Core/Rewriter.h" #include "llvm/ADT/DenseSet.h" diff --git a/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index a7c140188b35..166631558806 100644 --- a/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -15,6 +15,7 @@ #include "clang/FrontendTool/Utils.h" #include "clang/ARCMigrate/ARCMTActions.h" #include "clang/CodeGen/CodeGenAction.h" +#include "clang/Config/config.h" #include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" diff --git a/contrib/llvm/tools/clang/lib/Headers/vecintrin.h b/contrib/llvm/tools/clang/lib/Headers/vecintrin.h index ca7acb4731f9..f7061e88949f 100644 --- a/contrib/llvm/tools/clang/lib/Headers/vecintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/vecintrin.h @@ -116,6 +116,13 @@ vec_extract(vector unsigned long long __vec, int __index) { return __vec[__index & 1]; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai float +vec_extract(vector float __vec, int __index) { + return __vec[__index & 3]; +} +#endif + static inline __ATTRS_o_ai double vec_extract(vector double __vec, int __index) { return __vec[__index & 1]; @@ -129,6 +136,7 @@ vec_insert(signed char __scalar, vector signed char __vec, int __index) { return __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_insert(unsigned char __scalar, vector bool char __vec, int __index) { vector unsigned char __newvec = (vector unsigned char)__vec; @@ -148,6 +156,7 @@ vec_insert(signed short __scalar, vector signed short __vec, int __index) { return __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_insert(unsigned short __scalar, vector bool short __vec, int __index) { vector unsigned short __newvec = (vector unsigned short)__vec; @@ -167,6 +176,7 @@ vec_insert(signed int __scalar, vector signed int __vec, int __index) { return __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_insert(unsigned int __scalar, vector bool int __vec, int __index) { vector unsigned int __newvec = (vector unsigned int)__vec; @@ -187,6 +197,7 @@ vec_insert(signed long long __scalar, vector signed long long __vec, return __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_insert(unsigned long long __scalar, vector bool long long __vec, int __index) { @@ -202,6 +213,14 @@ vec_insert(unsigned long long __scalar, vector unsigned long long __vec, return __vec; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_insert(float __scalar, vector float __vec, int __index) { + __vec[__index & 1] = __scalar; + return __vec; +} +#endif + static inline __ATTRS_o_ai vector double vec_insert(double __scalar, vector double __vec, int __index) { __vec[__index & 1] = __scalar; @@ -282,6 +301,16 @@ vec_promote(unsigned long long __scalar, int __index) { return __vec; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_promote(float __scalar, int __index) { + const vector float __zero = (vector float)0; + vector float __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); + __vec[__index & 3] = __scalar; + return __vec; +} +#endif + static inline __ATTRS_o_ai vector double vec_promote(double __scalar, int __index) { const vector double __zero = (vector double)0; @@ -348,6 +377,15 @@ vec_insert_and_zero(const unsigned long long *__ptr) { return __vec; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_insert_and_zero(const float *__ptr) { + vector float __vec = (vector float)0; + __vec[0] = *__ptr; + return __vec; +} +#endif + static inline __ATTRS_o_ai vector double vec_insert_and_zero(const double *__ptr) { vector double __vec = (vector double)0; @@ -441,6 +479,15 @@ vec_perm(vector bool long long __a, vector bool long long __b, (vector unsigned char)__a, (vector unsigned char)__b, __c); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_perm(vector float __a, vector float __b, + vector unsigned char __c) { + return (vector float)__builtin_s390_vperm( + (vector unsigned char)__a, (vector unsigned char)__b, __c); +} +#endif + static inline __ATTRS_o_ai vector double vec_perm(vector double __a, vector double __b, vector unsigned char __c) { @@ -450,18 +497,22 @@ vec_perm(vector double __a, vector double __b, /*-- vec_permi --------------------------------------------------------------*/ +// This prototype is deprecated. extern __ATTRS_o vector signed long long vec_permi(vector signed long long __a, vector signed long long __b, int __c) __constant_range(__c, 0, 3); +// This prototype is deprecated. extern __ATTRS_o vector unsigned long long vec_permi(vector unsigned long long __a, vector unsigned long long __b, int __c) __constant_range(__c, 0, 3); +// This prototype is deprecated. extern __ATTRS_o vector bool long long vec_permi(vector bool long long __a, vector bool long long __b, int __c) __constant_range(__c, 0, 3); +// This prototype is deprecated. extern __ATTRS_o vector double vec_permi(vector double __a, vector double __b, int __c) __constant_range(__c, 0, 3); @@ -471,6 +522,15 @@ vec_permi(vector double __a, vector double __b, int __c) (vector unsigned long long)(Y), \ (((Z) & 2) << 1) | ((Z) & 1))) +/*-- vec_bperm_u128 ---------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_ai vector unsigned long long +vec_bperm_u128(vector unsigned char __a, vector unsigned char __b) { + return __builtin_s390_vbperm(__a, __b); +} +#endif + /*-- vec_sel ----------------------------------------------------------------*/ static inline __ATTRS_o_ai vector signed char @@ -614,6 +674,22 @@ vec_sel(vector unsigned long long __a, vector unsigned long long __b, (~(vector unsigned long long)__c & __a)); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_sel(vector float __a, vector float __b, vector unsigned int __c) { + return (vector float)((__c & (vector unsigned int)__b) | + (~__c & (vector unsigned int)__a)); +} + +static inline __ATTRS_o_ai vector float +vec_sel(vector float __a, vector float __b, vector bool int __c) { + vector unsigned int __ac = (vector unsigned int)__a; + vector unsigned int __bc = (vector unsigned int)__b; + vector unsigned int __cc = (vector unsigned int)__c; + return (vector float)((__cc & __bc) | (~__cc & __ac)); +} +#endif + static inline __ATTRS_o_ai vector double vec_sel(vector double __a, vector double __b, vector unsigned long long __c) { return (vector double)((__c & (vector unsigned long long)__b) | @@ -687,6 +763,17 @@ vec_gather_element(vector unsigned long long __vec, return __vec; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_gather_element(vector float __vec, vector unsigned int __offset, + const float *__ptr, int __index) + __constant_range(__index, 0, 3) { + __vec[__index] = *(const float *)( + (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + return __vec; +} +#endif + static inline __ATTRS_o_ai vector double vec_gather_element(vector double __vec, vector unsigned long long __offset, const double *__ptr, int __index) @@ -749,6 +836,16 @@ vec_scatter_element(vector unsigned long long __vec, __vec[__index]; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai void +vec_scatter_element(vector float __vec, vector unsigned int __offset, + float *__ptr, int __index) + __constant_range(__index, 0, 3) { + *(float *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + __vec[__index]; +} +#endif + static inline __ATTRS_o_ai void vec_scatter_element(vector double __vec, vector unsigned long long __offset, double *__ptr, int __index) @@ -757,48 +854,111 @@ vec_scatter_element(vector double __vec, vector unsigned long long __offset, __vec[__index]; } +/*-- vec_xl -----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_xl(long __offset, const signed char *__ptr) { + return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_xl(long __offset, const unsigned char *__ptr) { + return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed short +vec_xl(long __offset, const signed short *__ptr) { + return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_xl(long __offset, const unsigned short *__ptr) { + return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed int +vec_xl(long __offset, const signed int *__ptr) { + return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_xl(long __offset, const unsigned int *__ptr) { + return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector signed long long +vec_xl(long __offset, const signed long long *__ptr) { + return *(const vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_xl(long __offset, const unsigned long long *__ptr) { + return *(const vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset); +} + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_xl(long __offset, const float *__ptr) { + return *(const vector float *)((__INTPTR_TYPE__)__ptr + __offset); +} +#endif + +static inline __ATTRS_o_ai vector double +vec_xl(long __offset, const double *__ptr) { + return *(const vector double *)((__INTPTR_TYPE__)__ptr + __offset); +} + /*-- vec_xld2 ---------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_xld2(long __offset, const signed char *__ptr) { return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_xld2(long __offset, const unsigned char *__ptr) { return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_xld2(long __offset, const signed short *__ptr) { return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_xld2(long __offset, const unsigned short *__ptr) { return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_xld2(long __offset, const signed int *__ptr) { return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_xld2(long __offset, const unsigned int *__ptr) { return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_xld2(long __offset, const signed long long *__ptr) { return *(const vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_xld2(long __offset, const unsigned long long *__ptr) { return *(const vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector double vec_xld2(long __offset, const double *__ptr) { return *(const vector double *)((__INTPTR_TYPE__)__ptr + __offset); @@ -806,74 +966,145 @@ vec_xld2(long __offset, const double *__ptr) { /*-- vec_xlw4 ---------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_xlw4(long __offset, const signed char *__ptr) { return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_xlw4(long __offset, const unsigned char *__ptr) { return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_xlw4(long __offset, const signed short *__ptr) { return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_xlw4(long __offset, const unsigned short *__ptr) { return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_xlw4(long __offset, const signed int *__ptr) { return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_xlw4(long __offset, const unsigned int *__ptr) { return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); } +/*-- vec_xst ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai void +vec_xst(vector signed char __vec, long __offset, signed char *__ptr) { + *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xst(vector unsigned char __vec, long __offset, unsigned char *__ptr) { + *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xst(vector signed short __vec, long __offset, signed short *__ptr) { + *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xst(vector unsigned short __vec, long __offset, unsigned short *__ptr) { + *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xst(vector signed int __vec, long __offset, signed int *__ptr) { + *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xst(vector unsigned int __vec, long __offset, unsigned int *__ptr) { + *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xst(vector signed long long __vec, long __offset, + signed long long *__ptr) { + *(vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + +static inline __ATTRS_o_ai void +vec_xst(vector unsigned long long __vec, long __offset, + unsigned long long *__ptr) { + *(vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset) = + __vec; +} + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai void +vec_xst(vector float __vec, long __offset, float *__ptr) { + *(vector float *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} +#endif + +static inline __ATTRS_o_ai void +vec_xst(vector double __vec, long __offset, double *__ptr) { + *(vector double *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +} + /*-- vec_xstd2 --------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector signed char __vec, long __offset, signed char *__ptr) { *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector unsigned char __vec, long __offset, unsigned char *__ptr) { *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector signed short __vec, long __offset, signed short *__ptr) { *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector unsigned short __vec, long __offset, unsigned short *__ptr) { *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector signed int __vec, long __offset, signed int *__ptr) { *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector unsigned int __vec, long __offset, unsigned int *__ptr) { *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector signed long long __vec, long __offset, signed long long *__ptr) { *(vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector unsigned long long __vec, long __offset, unsigned long long *__ptr) { @@ -881,6 +1112,7 @@ vec_xstd2(vector unsigned long long __vec, long __offset, __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstd2(vector double __vec, long __offset, double *__ptr) { *(vector double *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; @@ -888,31 +1120,37 @@ vec_xstd2(vector double __vec, long __offset, double *__ptr) { /*-- vec_xstw4 --------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(vector signed char __vec, long __offset, signed char *__ptr) { *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(vector unsigned char __vec, long __offset, unsigned char *__ptr) { *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(vector signed short __vec, long __offset, signed short *__ptr) { *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(vector unsigned short __vec, long __offset, unsigned short *__ptr) { *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(vector signed int __vec, long __offset, signed int *__ptr) { *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_xstw4(vector unsigned int __vec, long __offset, unsigned int *__ptr) { *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; @@ -952,6 +1190,12 @@ extern __ATTRS_o vector unsigned long long vec_load_bndry(const unsigned long long *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); +#if __ARCH__ >= 12 +extern __ATTRS_o vector float +vec_load_bndry(const float *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); +#endif + extern __ATTRS_o vector double vec_load_bndry(const double *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); @@ -1007,11 +1251,27 @@ vec_load_len(const unsigned long long *__ptr, unsigned int __len) { return (vector unsigned long long)__builtin_s390_vll(__len, __ptr); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_load_len(const float *__ptr, unsigned int __len) { + return (vector float)__builtin_s390_vll(__len, __ptr); +} +#endif + static inline __ATTRS_o_ai vector double vec_load_len(const double *__ptr, unsigned int __len) { return (vector double)__builtin_s390_vll(__len, __ptr); } +/*-- vec_load_len_r ---------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_ai vector unsigned char +vec_load_len_r(const unsigned char *__ptr, unsigned int __len) { + return (vector unsigned char)__builtin_s390_vlrl(__len, __ptr); +} +#endif + /*-- vec_store_len ----------------------------------------------------------*/ static inline __ATTRS_o_ai void @@ -1062,12 +1322,30 @@ vec_store_len(vector unsigned long long __vec, unsigned long long *__ptr, __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai void +vec_store_len(vector float __vec, float *__ptr, + unsigned int __len) { + __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); +} +#endif + static inline __ATTRS_o_ai void vec_store_len(vector double __vec, double *__ptr, unsigned int __len) { __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); } +/*-- vec_store_len_r --------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_ai void +vec_store_len_r(vector unsigned char __vec, unsigned char *__ptr, + unsigned int __len) { + __builtin_s390_vstrl((vector signed char)__vec, __len, __ptr); +} +#endif + /*-- vec_load_pair ----------------------------------------------------------*/ static inline __ATTRS_o_ai vector signed long long @@ -1232,6 +1510,14 @@ vec_splat(vector unsigned long long __vec, int __index) return (vector unsigned long long)__vec[__index]; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_splat(vector float __vec, int __index) + __constant_range(__index, 0, 3) { + return (vector float)__vec[__index]; +} +#endif + static inline __ATTRS_o_ai vector double vec_splat(vector double __vec, int __index) __constant_range(__index, 0, 1) { @@ -1332,6 +1618,13 @@ vec_splats(unsigned long long __scalar) { return (vector unsigned long long)__scalar; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_splats(float __scalar) { + return (vector float)__scalar; +} +#endif + static inline __ATTRS_o_ai vector double vec_splats(double __scalar) { return (vector double)__scalar; @@ -1425,6 +1718,13 @@ vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) { return (vector unsigned long long)(__a[0], __b[0]); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_mergeh(vector float __a, vector float __b) { + return (vector float)(__a[0], __b[0], __a[1], __b[1]); +} +#endif + static inline __ATTRS_o_ai vector double vec_mergeh(vector double __a, vector double __b) { return (vector double)(__a[0], __b[0]); @@ -1501,6 +1801,13 @@ vec_mergel(vector unsigned long long __a, vector unsigned long long __b) { return (vector unsigned long long)(__a[1], __b[1]); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_mergel(vector float __a, vector float __b) { + return (vector float)(__a[2], __b[2], __a[3], __b[3]); +} +#endif + static inline __ATTRS_o_ai vector double vec_mergel(vector double __a, vector double __b) { return (vector double)(__a[1], __b[1]); @@ -1866,6 +2173,13 @@ vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)(__a == __b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool int +vec_cmpeq(vector float __a, vector float __b) { + return (vector bool int)(__a == __b); +} +#endif + static inline __ATTRS_o_ai vector bool long long vec_cmpeq(vector double __a, vector double __b) { return (vector bool long long)(__a == __b); @@ -1913,6 +2227,13 @@ vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)(__a >= __b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool int +vec_cmpge(vector float __a, vector float __b) { + return (vector bool int)(__a >= __b); +} +#endif + static inline __ATTRS_o_ai vector bool long long vec_cmpge(vector double __a, vector double __b) { return (vector bool long long)(__a >= __b); @@ -1960,6 +2281,13 @@ vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)(__a > __b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool int +vec_cmpgt(vector float __a, vector float __b) { + return (vector bool int)(__a > __b); +} +#endif + static inline __ATTRS_o_ai vector bool long long vec_cmpgt(vector double __a, vector double __b) { return (vector bool long long)(__a > __b); @@ -2007,6 +2335,13 @@ vec_cmple(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)(__a <= __b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool int +vec_cmple(vector float __a, vector float __b) { + return (vector bool int)(__a <= __b); +} +#endif + static inline __ATTRS_o_ai vector bool long long vec_cmple(vector double __a, vector double __b) { return (vector bool long long)(__a <= __b); @@ -2054,6 +2389,13 @@ vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) { return (vector bool long long)(__a < __b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool int +vec_cmplt(vector float __a, vector float __b) { + return (vector bool int)(__a < __b); +} +#endif + static inline __ATTRS_o_ai vector bool long long vec_cmplt(vector double __a, vector double __b) { return (vector bool long long)(__a < __b); @@ -2068,6 +2410,7 @@ vec_all_eq(vector signed char __a, vector signed char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector signed char __a, vector bool char __b) { int __cc; @@ -2075,6 +2418,7 @@ vec_all_eq(vector signed char __a, vector bool char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool char __a, vector signed char __b) { int __cc; @@ -2090,6 +2434,7 @@ vec_all_eq(vector unsigned char __a, vector unsigned char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector unsigned char __a, vector bool char __b) { int __cc; @@ -2098,6 +2443,7 @@ vec_all_eq(vector unsigned char __a, vector bool char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool char __a, vector unsigned char __b) { int __cc; @@ -2121,6 +2467,7 @@ vec_all_eq(vector signed short __a, vector signed short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector signed short __a, vector bool short __b) { int __cc; @@ -2128,6 +2475,7 @@ vec_all_eq(vector signed short __a, vector bool short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool short __a, vector signed short __b) { int __cc; @@ -2143,6 +2491,7 @@ vec_all_eq(vector unsigned short __a, vector unsigned short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector unsigned short __a, vector bool short __b) { int __cc; @@ -2151,6 +2500,7 @@ vec_all_eq(vector unsigned short __a, vector bool short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool short __a, vector unsigned short __b) { int __cc; @@ -2174,6 +2524,7 @@ vec_all_eq(vector signed int __a, vector signed int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector signed int __a, vector bool int __b) { int __cc; @@ -2181,6 +2532,7 @@ vec_all_eq(vector signed int __a, vector bool int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool int __a, vector signed int __b) { int __cc; @@ -2196,6 +2548,7 @@ vec_all_eq(vector unsigned int __a, vector unsigned int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector unsigned int __a, vector bool int __b) { int __cc; @@ -2204,6 +2557,7 @@ vec_all_eq(vector unsigned int __a, vector bool int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool int __a, vector unsigned int __b) { int __cc; @@ -2227,6 +2581,7 @@ vec_all_eq(vector signed long long __a, vector signed long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector signed long long __a, vector bool long long __b) { int __cc; @@ -2234,6 +2589,7 @@ vec_all_eq(vector signed long long __a, vector bool long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool long long __a, vector signed long long __b) { int __cc; @@ -2249,6 +2605,7 @@ vec_all_eq(vector unsigned long long __a, vector unsigned long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -2257,6 +2614,7 @@ vec_all_eq(vector unsigned long long __a, vector bool long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_eq(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -2273,6 +2631,15 @@ vec_all_eq(vector bool long long __a, vector bool long long __b) { return __cc == 0; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_eq(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfcesbs(__a, __b, &__cc); + return __cc == 0; +} +#endif + static inline __ATTRS_o_ai int vec_all_eq(vector double __a, vector double __b) { int __cc; @@ -2289,6 +2656,7 @@ vec_all_ne(vector signed char __a, vector signed char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector signed char __a, vector bool char __b) { int __cc; @@ -2296,6 +2664,7 @@ vec_all_ne(vector signed char __a, vector bool char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool char __a, vector signed char __b) { int __cc; @@ -2311,6 +2680,7 @@ vec_all_ne(vector unsigned char __a, vector unsigned char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector unsigned char __a, vector bool char __b) { int __cc; @@ -2319,6 +2689,7 @@ vec_all_ne(vector unsigned char __a, vector bool char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool char __a, vector unsigned char __b) { int __cc; @@ -2342,6 +2713,7 @@ vec_all_ne(vector signed short __a, vector signed short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector signed short __a, vector bool short __b) { int __cc; @@ -2349,6 +2721,7 @@ vec_all_ne(vector signed short __a, vector bool short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool short __a, vector signed short __b) { int __cc; @@ -2364,6 +2737,7 @@ vec_all_ne(vector unsigned short __a, vector unsigned short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector unsigned short __a, vector bool short __b) { int __cc; @@ -2372,6 +2746,7 @@ vec_all_ne(vector unsigned short __a, vector bool short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool short __a, vector unsigned short __b) { int __cc; @@ -2395,6 +2770,7 @@ vec_all_ne(vector signed int __a, vector signed int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector signed int __a, vector bool int __b) { int __cc; @@ -2402,6 +2778,7 @@ vec_all_ne(vector signed int __a, vector bool int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool int __a, vector signed int __b) { int __cc; @@ -2417,6 +2794,7 @@ vec_all_ne(vector unsigned int __a, vector unsigned int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector unsigned int __a, vector bool int __b) { int __cc; @@ -2425,6 +2803,7 @@ vec_all_ne(vector unsigned int __a, vector bool int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool int __a, vector unsigned int __b) { int __cc; @@ -2448,6 +2827,7 @@ vec_all_ne(vector signed long long __a, vector signed long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector signed long long __a, vector bool long long __b) { int __cc; @@ -2455,6 +2835,7 @@ vec_all_ne(vector signed long long __a, vector bool long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool long long __a, vector signed long long __b) { int __cc; @@ -2470,6 +2851,7 @@ vec_all_ne(vector unsigned long long __a, vector unsigned long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -2478,6 +2860,7 @@ vec_all_ne(vector unsigned long long __a, vector bool long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ne(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -2494,6 +2877,15 @@ vec_all_ne(vector bool long long __a, vector bool long long __b) { return __cc == 3; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_ne(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfcesbs(__a, __b, &__cc); + return __cc == 3; +} +#endif + static inline __ATTRS_o_ai int vec_all_ne(vector double __a, vector double __b) { int __cc; @@ -2510,6 +2902,7 @@ vec_all_ge(vector signed char __a, vector signed char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector signed char __a, vector bool char __b) { int __cc; @@ -2517,6 +2910,7 @@ vec_all_ge(vector signed char __a, vector bool char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool char __a, vector signed char __b) { int __cc; @@ -2531,6 +2925,7 @@ vec_all_ge(vector unsigned char __a, vector unsigned char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector unsigned char __a, vector bool char __b) { int __cc; @@ -2538,6 +2933,7 @@ vec_all_ge(vector unsigned char __a, vector bool char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool char __a, vector unsigned char __b) { int __cc; @@ -2545,6 +2941,7 @@ vec_all_ge(vector bool char __a, vector unsigned char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool char __a, vector bool char __b) { int __cc; @@ -2560,6 +2957,7 @@ vec_all_ge(vector signed short __a, vector signed short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector signed short __a, vector bool short __b) { int __cc; @@ -2567,6 +2965,7 @@ vec_all_ge(vector signed short __a, vector bool short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool short __a, vector signed short __b) { int __cc; @@ -2581,6 +2980,7 @@ vec_all_ge(vector unsigned short __a, vector unsigned short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector unsigned short __a, vector bool short __b) { int __cc; @@ -2588,6 +2988,7 @@ vec_all_ge(vector unsigned short __a, vector bool short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool short __a, vector unsigned short __b) { int __cc; @@ -2595,6 +2996,7 @@ vec_all_ge(vector bool short __a, vector unsigned short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool short __a, vector bool short __b) { int __cc; @@ -2610,6 +3012,7 @@ vec_all_ge(vector signed int __a, vector signed int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector signed int __a, vector bool int __b) { int __cc; @@ -2617,6 +3020,7 @@ vec_all_ge(vector signed int __a, vector bool int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool int __a, vector signed int __b) { int __cc; @@ -2631,6 +3035,7 @@ vec_all_ge(vector unsigned int __a, vector unsigned int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector unsigned int __a, vector bool int __b) { int __cc; @@ -2638,6 +3043,7 @@ vec_all_ge(vector unsigned int __a, vector bool int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool int __a, vector unsigned int __b) { int __cc; @@ -2645,6 +3051,7 @@ vec_all_ge(vector bool int __a, vector unsigned int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool int __a, vector bool int __b) { int __cc; @@ -2660,6 +3067,7 @@ vec_all_ge(vector signed long long __a, vector signed long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector signed long long __a, vector bool long long __b) { int __cc; @@ -2667,6 +3075,7 @@ vec_all_ge(vector signed long long __a, vector bool long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool long long __a, vector signed long long __b) { int __cc; @@ -2681,6 +3090,7 @@ vec_all_ge(vector unsigned long long __a, vector unsigned long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -2688,6 +3098,7 @@ vec_all_ge(vector unsigned long long __a, vector bool long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -2695,6 +3106,7 @@ vec_all_ge(vector bool long long __a, vector unsigned long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_ge(vector bool long long __a, vector bool long long __b) { int __cc; @@ -2703,6 +3115,15 @@ vec_all_ge(vector bool long long __a, vector bool long long __b) { return __cc == 3; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_ge(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__a, __b, &__cc); + return __cc == 0; +} +#endif + static inline __ATTRS_o_ai int vec_all_ge(vector double __a, vector double __b) { int __cc; @@ -2719,6 +3140,7 @@ vec_all_gt(vector signed char __a, vector signed char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector signed char __a, vector bool char __b) { int __cc; @@ -2726,6 +3148,7 @@ vec_all_gt(vector signed char __a, vector bool char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool char __a, vector signed char __b) { int __cc; @@ -2740,6 +3163,7 @@ vec_all_gt(vector unsigned char __a, vector unsigned char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector unsigned char __a, vector bool char __b) { int __cc; @@ -2747,6 +3171,7 @@ vec_all_gt(vector unsigned char __a, vector bool char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool char __a, vector unsigned char __b) { int __cc; @@ -2754,6 +3179,7 @@ vec_all_gt(vector bool char __a, vector unsigned char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool char __a, vector bool char __b) { int __cc; @@ -2769,6 +3195,7 @@ vec_all_gt(vector signed short __a, vector signed short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector signed short __a, vector bool short __b) { int __cc; @@ -2776,6 +3203,7 @@ vec_all_gt(vector signed short __a, vector bool short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool short __a, vector signed short __b) { int __cc; @@ -2790,6 +3218,7 @@ vec_all_gt(vector unsigned short __a, vector unsigned short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector unsigned short __a, vector bool short __b) { int __cc; @@ -2797,6 +3226,7 @@ vec_all_gt(vector unsigned short __a, vector bool short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool short __a, vector unsigned short __b) { int __cc; @@ -2804,6 +3234,7 @@ vec_all_gt(vector bool short __a, vector unsigned short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool short __a, vector bool short __b) { int __cc; @@ -2819,6 +3250,7 @@ vec_all_gt(vector signed int __a, vector signed int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector signed int __a, vector bool int __b) { int __cc; @@ -2826,6 +3258,7 @@ vec_all_gt(vector signed int __a, vector bool int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool int __a, vector signed int __b) { int __cc; @@ -2840,6 +3273,7 @@ vec_all_gt(vector unsigned int __a, vector unsigned int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector unsigned int __a, vector bool int __b) { int __cc; @@ -2847,6 +3281,7 @@ vec_all_gt(vector unsigned int __a, vector bool int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool int __a, vector unsigned int __b) { int __cc; @@ -2854,6 +3289,7 @@ vec_all_gt(vector bool int __a, vector unsigned int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool int __a, vector bool int __b) { int __cc; @@ -2869,6 +3305,7 @@ vec_all_gt(vector signed long long __a, vector signed long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector signed long long __a, vector bool long long __b) { int __cc; @@ -2876,6 +3313,7 @@ vec_all_gt(vector signed long long __a, vector bool long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool long long __a, vector signed long long __b) { int __cc; @@ -2890,6 +3328,7 @@ vec_all_gt(vector unsigned long long __a, vector unsigned long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -2897,6 +3336,7 @@ vec_all_gt(vector unsigned long long __a, vector bool long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -2904,6 +3344,7 @@ vec_all_gt(vector bool long long __a, vector unsigned long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_gt(vector bool long long __a, vector bool long long __b) { int __cc; @@ -2912,6 +3353,15 @@ vec_all_gt(vector bool long long __a, vector bool long long __b) { return __cc == 0; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_gt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__a, __b, &__cc); + return __cc == 0; +} +#endif + static inline __ATTRS_o_ai int vec_all_gt(vector double __a, vector double __b) { int __cc; @@ -2928,6 +3378,7 @@ vec_all_le(vector signed char __a, vector signed char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector signed char __a, vector bool char __b) { int __cc; @@ -2935,6 +3386,7 @@ vec_all_le(vector signed char __a, vector bool char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool char __a, vector signed char __b) { int __cc; @@ -2949,6 +3401,7 @@ vec_all_le(vector unsigned char __a, vector unsigned char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector unsigned char __a, vector bool char __b) { int __cc; @@ -2956,6 +3409,7 @@ vec_all_le(vector unsigned char __a, vector bool char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool char __a, vector unsigned char __b) { int __cc; @@ -2963,6 +3417,7 @@ vec_all_le(vector bool char __a, vector unsigned char __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool char __a, vector bool char __b) { int __cc; @@ -2978,6 +3433,7 @@ vec_all_le(vector signed short __a, vector signed short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector signed short __a, vector bool short __b) { int __cc; @@ -2985,6 +3441,7 @@ vec_all_le(vector signed short __a, vector bool short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool short __a, vector signed short __b) { int __cc; @@ -2999,6 +3456,7 @@ vec_all_le(vector unsigned short __a, vector unsigned short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector unsigned short __a, vector bool short __b) { int __cc; @@ -3006,6 +3464,7 @@ vec_all_le(vector unsigned short __a, vector bool short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool short __a, vector unsigned short __b) { int __cc; @@ -3013,6 +3472,7 @@ vec_all_le(vector bool short __a, vector unsigned short __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool short __a, vector bool short __b) { int __cc; @@ -3028,6 +3488,7 @@ vec_all_le(vector signed int __a, vector signed int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector signed int __a, vector bool int __b) { int __cc; @@ -3035,6 +3496,7 @@ vec_all_le(vector signed int __a, vector bool int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool int __a, vector signed int __b) { int __cc; @@ -3049,6 +3511,7 @@ vec_all_le(vector unsigned int __a, vector unsigned int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector unsigned int __a, vector bool int __b) { int __cc; @@ -3056,6 +3519,7 @@ vec_all_le(vector unsigned int __a, vector bool int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool int __a, vector unsigned int __b) { int __cc; @@ -3063,6 +3527,7 @@ vec_all_le(vector bool int __a, vector unsigned int __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool int __a, vector bool int __b) { int __cc; @@ -3078,6 +3543,7 @@ vec_all_le(vector signed long long __a, vector signed long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector signed long long __a, vector bool long long __b) { int __cc; @@ -3085,6 +3551,7 @@ vec_all_le(vector signed long long __a, vector bool long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool long long __a, vector signed long long __b) { int __cc; @@ -3099,6 +3566,7 @@ vec_all_le(vector unsigned long long __a, vector unsigned long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -3106,6 +3574,7 @@ vec_all_le(vector unsigned long long __a, vector bool long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -3113,6 +3582,7 @@ vec_all_le(vector bool long long __a, vector unsigned long long __b) { return __cc == 3; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_le(vector bool long long __a, vector bool long long __b) { int __cc; @@ -3121,6 +3591,15 @@ vec_all_le(vector bool long long __a, vector bool long long __b) { return __cc == 3; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_le(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__b, __a, &__cc); + return __cc == 0; +} +#endif + static inline __ATTRS_o_ai int vec_all_le(vector double __a, vector double __b) { int __cc; @@ -3137,6 +3616,7 @@ vec_all_lt(vector signed char __a, vector signed char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector signed char __a, vector bool char __b) { int __cc; @@ -3144,6 +3624,7 @@ vec_all_lt(vector signed char __a, vector bool char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool char __a, vector signed char __b) { int __cc; @@ -3158,6 +3639,7 @@ vec_all_lt(vector unsigned char __a, vector unsigned char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector unsigned char __a, vector bool char __b) { int __cc; @@ -3165,6 +3647,7 @@ vec_all_lt(vector unsigned char __a, vector bool char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool char __a, vector unsigned char __b) { int __cc; @@ -3172,6 +3655,7 @@ vec_all_lt(vector bool char __a, vector unsigned char __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool char __a, vector bool char __b) { int __cc; @@ -3187,6 +3671,7 @@ vec_all_lt(vector signed short __a, vector signed short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector signed short __a, vector bool short __b) { int __cc; @@ -3194,6 +3679,7 @@ vec_all_lt(vector signed short __a, vector bool short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool short __a, vector signed short __b) { int __cc; @@ -3208,6 +3694,7 @@ vec_all_lt(vector unsigned short __a, vector unsigned short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector unsigned short __a, vector bool short __b) { int __cc; @@ -3215,6 +3702,7 @@ vec_all_lt(vector unsigned short __a, vector bool short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool short __a, vector unsigned short __b) { int __cc; @@ -3222,6 +3710,7 @@ vec_all_lt(vector bool short __a, vector unsigned short __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool short __a, vector bool short __b) { int __cc; @@ -3237,6 +3726,7 @@ vec_all_lt(vector signed int __a, vector signed int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector signed int __a, vector bool int __b) { int __cc; @@ -3244,6 +3734,7 @@ vec_all_lt(vector signed int __a, vector bool int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool int __a, vector signed int __b) { int __cc; @@ -3258,6 +3749,7 @@ vec_all_lt(vector unsigned int __a, vector unsigned int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector unsigned int __a, vector bool int __b) { int __cc; @@ -3265,6 +3757,7 @@ vec_all_lt(vector unsigned int __a, vector bool int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool int __a, vector unsigned int __b) { int __cc; @@ -3272,6 +3765,7 @@ vec_all_lt(vector bool int __a, vector unsigned int __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool int __a, vector bool int __b) { int __cc; @@ -3287,6 +3781,7 @@ vec_all_lt(vector signed long long __a, vector signed long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector signed long long __a, vector bool long long __b) { int __cc; @@ -3294,6 +3789,7 @@ vec_all_lt(vector signed long long __a, vector bool long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool long long __a, vector signed long long __b) { int __cc; @@ -3308,6 +3804,7 @@ vec_all_lt(vector unsigned long long __a, vector unsigned long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -3315,6 +3812,7 @@ vec_all_lt(vector unsigned long long __a, vector bool long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -3322,6 +3820,7 @@ vec_all_lt(vector bool long long __a, vector unsigned long long __b) { return __cc == 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_all_lt(vector bool long long __a, vector bool long long __b) { int __cc; @@ -3330,6 +3829,15 @@ vec_all_lt(vector bool long long __a, vector bool long long __b) { return __cc == 0; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_lt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__b, __a, &__cc); + return __cc == 0; +} +#endif + static inline __ATTRS_o_ai int vec_all_lt(vector double __a, vector double __b) { int __cc; @@ -3339,7 +3847,16 @@ vec_all_lt(vector double __a, vector double __b) { /*-- vec_all_nge ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_nge(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__a, __b, &__cc); + return __cc == 3; +} +#endif + +static inline __ATTRS_o_ai int vec_all_nge(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); @@ -3348,7 +3865,16 @@ vec_all_nge(vector double __a, vector double __b) { /*-- vec_all_ngt ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_ngt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__a, __b, &__cc); + return __cc == 3; +} +#endif + +static inline __ATTRS_o_ai int vec_all_ngt(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); @@ -3357,7 +3883,16 @@ vec_all_ngt(vector double __a, vector double __b) { /*-- vec_all_nle ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_nle(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__b, __a, &__cc); + return __cc == 3; +} +#endif + +static inline __ATTRS_o_ai int vec_all_nle(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); @@ -3366,7 +3901,16 @@ vec_all_nle(vector double __a, vector double __b) { /*-- vec_all_nlt ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_nlt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__b, __a, &__cc); + return __cc == 3; +} +#endif + +static inline __ATTRS_o_ai int vec_all_nlt(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); @@ -3375,7 +3919,16 @@ vec_all_nlt(vector double __a, vector double __b) { /*-- vec_all_nan ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_nan(vector float __a) { + int __cc; + __builtin_s390_vftcisb(__a, 15, &__cc); + return __cc == 0; +} +#endif + +static inline __ATTRS_o_ai int vec_all_nan(vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); @@ -3384,7 +3937,16 @@ vec_all_nan(vector double __a) { /*-- vec_all_numeric --------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_all_numeric(vector float __a) { + int __cc; + __builtin_s390_vftcisb(__a, 15, &__cc); + return __cc == 3; +} +#endif + +static inline __ATTRS_o_ai int vec_all_numeric(vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); @@ -3400,6 +3962,7 @@ vec_any_eq(vector signed char __a, vector signed char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector signed char __a, vector bool char __b) { int __cc; @@ -3407,6 +3970,7 @@ vec_any_eq(vector signed char __a, vector bool char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool char __a, vector signed char __b) { int __cc; @@ -3422,6 +3986,7 @@ vec_any_eq(vector unsigned char __a, vector unsigned char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector unsigned char __a, vector bool char __b) { int __cc; @@ -3430,6 +3995,7 @@ vec_any_eq(vector unsigned char __a, vector bool char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool char __a, vector unsigned char __b) { int __cc; @@ -3453,6 +4019,7 @@ vec_any_eq(vector signed short __a, vector signed short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector signed short __a, vector bool short __b) { int __cc; @@ -3460,6 +4027,7 @@ vec_any_eq(vector signed short __a, vector bool short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool short __a, vector signed short __b) { int __cc; @@ -3475,6 +4043,7 @@ vec_any_eq(vector unsigned short __a, vector unsigned short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector unsigned short __a, vector bool short __b) { int __cc; @@ -3483,6 +4052,7 @@ vec_any_eq(vector unsigned short __a, vector bool short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool short __a, vector unsigned short __b) { int __cc; @@ -3506,6 +4076,7 @@ vec_any_eq(vector signed int __a, vector signed int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector signed int __a, vector bool int __b) { int __cc; @@ -3513,6 +4084,7 @@ vec_any_eq(vector signed int __a, vector bool int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool int __a, vector signed int __b) { int __cc; @@ -3528,6 +4100,7 @@ vec_any_eq(vector unsigned int __a, vector unsigned int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector unsigned int __a, vector bool int __b) { int __cc; @@ -3536,6 +4109,7 @@ vec_any_eq(vector unsigned int __a, vector bool int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool int __a, vector unsigned int __b) { int __cc; @@ -3559,6 +4133,7 @@ vec_any_eq(vector signed long long __a, vector signed long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector signed long long __a, vector bool long long __b) { int __cc; @@ -3566,6 +4141,7 @@ vec_any_eq(vector signed long long __a, vector bool long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool long long __a, vector signed long long __b) { int __cc; @@ -3581,6 +4157,7 @@ vec_any_eq(vector unsigned long long __a, vector unsigned long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -3589,6 +4166,7 @@ vec_any_eq(vector unsigned long long __a, vector bool long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_eq(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -3605,6 +4183,15 @@ vec_any_eq(vector bool long long __a, vector bool long long __b) { return __cc <= 1; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_eq(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfcesbs(__a, __b, &__cc); + return __cc <= 1; +} +#endif + static inline __ATTRS_o_ai int vec_any_eq(vector double __a, vector double __b) { int __cc; @@ -3621,6 +4208,7 @@ vec_any_ne(vector signed char __a, vector signed char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector signed char __a, vector bool char __b) { int __cc; @@ -3628,6 +4216,7 @@ vec_any_ne(vector signed char __a, vector bool char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool char __a, vector signed char __b) { int __cc; @@ -3643,6 +4232,7 @@ vec_any_ne(vector unsigned char __a, vector unsigned char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector unsigned char __a, vector bool char __b) { int __cc; @@ -3651,6 +4241,7 @@ vec_any_ne(vector unsigned char __a, vector bool char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool char __a, vector unsigned char __b) { int __cc; @@ -3674,6 +4265,7 @@ vec_any_ne(vector signed short __a, vector signed short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector signed short __a, vector bool short __b) { int __cc; @@ -3681,6 +4273,7 @@ vec_any_ne(vector signed short __a, vector bool short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool short __a, vector signed short __b) { int __cc; @@ -3696,6 +4289,7 @@ vec_any_ne(vector unsigned short __a, vector unsigned short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector unsigned short __a, vector bool short __b) { int __cc; @@ -3704,6 +4298,7 @@ vec_any_ne(vector unsigned short __a, vector bool short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool short __a, vector unsigned short __b) { int __cc; @@ -3727,6 +4322,7 @@ vec_any_ne(vector signed int __a, vector signed int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector signed int __a, vector bool int __b) { int __cc; @@ -3734,6 +4330,7 @@ vec_any_ne(vector signed int __a, vector bool int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool int __a, vector signed int __b) { int __cc; @@ -3749,6 +4346,7 @@ vec_any_ne(vector unsigned int __a, vector unsigned int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector unsigned int __a, vector bool int __b) { int __cc; @@ -3757,6 +4355,7 @@ vec_any_ne(vector unsigned int __a, vector bool int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool int __a, vector unsigned int __b) { int __cc; @@ -3780,6 +4379,7 @@ vec_any_ne(vector signed long long __a, vector signed long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector signed long long __a, vector bool long long __b) { int __cc; @@ -3787,6 +4387,7 @@ vec_any_ne(vector signed long long __a, vector bool long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool long long __a, vector signed long long __b) { int __cc; @@ -3802,6 +4403,7 @@ vec_any_ne(vector unsigned long long __a, vector unsigned long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -3810,6 +4412,7 @@ vec_any_ne(vector unsigned long long __a, vector bool long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ne(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -3826,6 +4429,15 @@ vec_any_ne(vector bool long long __a, vector bool long long __b) { return __cc != 0; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_ne(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfcesbs(__a, __b, &__cc); + return __cc != 0; +} +#endif + static inline __ATTRS_o_ai int vec_any_ne(vector double __a, vector double __b) { int __cc; @@ -3842,6 +4454,7 @@ vec_any_ge(vector signed char __a, vector signed char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector signed char __a, vector bool char __b) { int __cc; @@ -3849,6 +4462,7 @@ vec_any_ge(vector signed char __a, vector bool char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool char __a, vector signed char __b) { int __cc; @@ -3863,6 +4477,7 @@ vec_any_ge(vector unsigned char __a, vector unsigned char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector unsigned char __a, vector bool char __b) { int __cc; @@ -3870,6 +4485,7 @@ vec_any_ge(vector unsigned char __a, vector bool char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool char __a, vector unsigned char __b) { int __cc; @@ -3877,6 +4493,7 @@ vec_any_ge(vector bool char __a, vector unsigned char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool char __a, vector bool char __b) { int __cc; @@ -3892,6 +4509,7 @@ vec_any_ge(vector signed short __a, vector signed short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector signed short __a, vector bool short __b) { int __cc; @@ -3899,6 +4517,7 @@ vec_any_ge(vector signed short __a, vector bool short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool short __a, vector signed short __b) { int __cc; @@ -3913,6 +4532,7 @@ vec_any_ge(vector unsigned short __a, vector unsigned short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector unsigned short __a, vector bool short __b) { int __cc; @@ -3920,6 +4540,7 @@ vec_any_ge(vector unsigned short __a, vector bool short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool short __a, vector unsigned short __b) { int __cc; @@ -3927,6 +4548,7 @@ vec_any_ge(vector bool short __a, vector unsigned short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool short __a, vector bool short __b) { int __cc; @@ -3942,6 +4564,7 @@ vec_any_ge(vector signed int __a, vector signed int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector signed int __a, vector bool int __b) { int __cc; @@ -3949,6 +4572,7 @@ vec_any_ge(vector signed int __a, vector bool int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool int __a, vector signed int __b) { int __cc; @@ -3963,6 +4587,7 @@ vec_any_ge(vector unsigned int __a, vector unsigned int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector unsigned int __a, vector bool int __b) { int __cc; @@ -3970,6 +4595,7 @@ vec_any_ge(vector unsigned int __a, vector bool int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool int __a, vector unsigned int __b) { int __cc; @@ -3977,6 +4603,7 @@ vec_any_ge(vector bool int __a, vector unsigned int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool int __a, vector bool int __b) { int __cc; @@ -3992,6 +4619,7 @@ vec_any_ge(vector signed long long __a, vector signed long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector signed long long __a, vector bool long long __b) { int __cc; @@ -3999,6 +4627,7 @@ vec_any_ge(vector signed long long __a, vector bool long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool long long __a, vector signed long long __b) { int __cc; @@ -4013,6 +4642,7 @@ vec_any_ge(vector unsigned long long __a, vector unsigned long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -4020,6 +4650,7 @@ vec_any_ge(vector unsigned long long __a, vector bool long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -4027,6 +4658,7 @@ vec_any_ge(vector bool long long __a, vector unsigned long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_ge(vector bool long long __a, vector bool long long __b) { int __cc; @@ -4035,6 +4667,15 @@ vec_any_ge(vector bool long long __a, vector bool long long __b) { return __cc != 0; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_ge(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__a, __b, &__cc); + return __cc <= 1; +} +#endif + static inline __ATTRS_o_ai int vec_any_ge(vector double __a, vector double __b) { int __cc; @@ -4051,6 +4692,7 @@ vec_any_gt(vector signed char __a, vector signed char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector signed char __a, vector bool char __b) { int __cc; @@ -4058,6 +4700,7 @@ vec_any_gt(vector signed char __a, vector bool char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool char __a, vector signed char __b) { int __cc; @@ -4072,6 +4715,7 @@ vec_any_gt(vector unsigned char __a, vector unsigned char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector unsigned char __a, vector bool char __b) { int __cc; @@ -4079,6 +4723,7 @@ vec_any_gt(vector unsigned char __a, vector bool char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool char __a, vector unsigned char __b) { int __cc; @@ -4086,6 +4731,7 @@ vec_any_gt(vector bool char __a, vector unsigned char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool char __a, vector bool char __b) { int __cc; @@ -4101,6 +4747,7 @@ vec_any_gt(vector signed short __a, vector signed short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector signed short __a, vector bool short __b) { int __cc; @@ -4108,6 +4755,7 @@ vec_any_gt(vector signed short __a, vector bool short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool short __a, vector signed short __b) { int __cc; @@ -4122,6 +4770,7 @@ vec_any_gt(vector unsigned short __a, vector unsigned short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector unsigned short __a, vector bool short __b) { int __cc; @@ -4129,6 +4778,7 @@ vec_any_gt(vector unsigned short __a, vector bool short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool short __a, vector unsigned short __b) { int __cc; @@ -4136,6 +4786,7 @@ vec_any_gt(vector bool short __a, vector unsigned short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool short __a, vector bool short __b) { int __cc; @@ -4151,6 +4802,7 @@ vec_any_gt(vector signed int __a, vector signed int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector signed int __a, vector bool int __b) { int __cc; @@ -4158,6 +4810,7 @@ vec_any_gt(vector signed int __a, vector bool int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool int __a, vector signed int __b) { int __cc; @@ -4172,6 +4825,7 @@ vec_any_gt(vector unsigned int __a, vector unsigned int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector unsigned int __a, vector bool int __b) { int __cc; @@ -4179,6 +4833,7 @@ vec_any_gt(vector unsigned int __a, vector bool int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool int __a, vector unsigned int __b) { int __cc; @@ -4186,6 +4841,7 @@ vec_any_gt(vector bool int __a, vector unsigned int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool int __a, vector bool int __b) { int __cc; @@ -4201,6 +4857,7 @@ vec_any_gt(vector signed long long __a, vector signed long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector signed long long __a, vector bool long long __b) { int __cc; @@ -4208,6 +4865,7 @@ vec_any_gt(vector signed long long __a, vector bool long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool long long __a, vector signed long long __b) { int __cc; @@ -4222,6 +4880,7 @@ vec_any_gt(vector unsigned long long __a, vector unsigned long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -4229,6 +4888,7 @@ vec_any_gt(vector unsigned long long __a, vector bool long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -4236,6 +4896,7 @@ vec_any_gt(vector bool long long __a, vector unsigned long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_gt(vector bool long long __a, vector bool long long __b) { int __cc; @@ -4244,6 +4905,15 @@ vec_any_gt(vector bool long long __a, vector bool long long __b) { return __cc <= 1; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_gt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__a, __b, &__cc); + return __cc <= 1; +} +#endif + static inline __ATTRS_o_ai int vec_any_gt(vector double __a, vector double __b) { int __cc; @@ -4260,6 +4930,7 @@ vec_any_le(vector signed char __a, vector signed char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector signed char __a, vector bool char __b) { int __cc; @@ -4267,6 +4938,7 @@ vec_any_le(vector signed char __a, vector bool char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool char __a, vector signed char __b) { int __cc; @@ -4281,6 +4953,7 @@ vec_any_le(vector unsigned char __a, vector unsigned char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector unsigned char __a, vector bool char __b) { int __cc; @@ -4288,6 +4961,7 @@ vec_any_le(vector unsigned char __a, vector bool char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool char __a, vector unsigned char __b) { int __cc; @@ -4295,6 +4969,7 @@ vec_any_le(vector bool char __a, vector unsigned char __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool char __a, vector bool char __b) { int __cc; @@ -4310,6 +4985,7 @@ vec_any_le(vector signed short __a, vector signed short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector signed short __a, vector bool short __b) { int __cc; @@ -4317,6 +4993,7 @@ vec_any_le(vector signed short __a, vector bool short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool short __a, vector signed short __b) { int __cc; @@ -4331,6 +5008,7 @@ vec_any_le(vector unsigned short __a, vector unsigned short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector unsigned short __a, vector bool short __b) { int __cc; @@ -4338,6 +5016,7 @@ vec_any_le(vector unsigned short __a, vector bool short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool short __a, vector unsigned short __b) { int __cc; @@ -4345,6 +5024,7 @@ vec_any_le(vector bool short __a, vector unsigned short __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool short __a, vector bool short __b) { int __cc; @@ -4360,6 +5040,7 @@ vec_any_le(vector signed int __a, vector signed int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector signed int __a, vector bool int __b) { int __cc; @@ -4367,6 +5048,7 @@ vec_any_le(vector signed int __a, vector bool int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool int __a, vector signed int __b) { int __cc; @@ -4381,6 +5063,7 @@ vec_any_le(vector unsigned int __a, vector unsigned int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector unsigned int __a, vector bool int __b) { int __cc; @@ -4388,6 +5071,7 @@ vec_any_le(vector unsigned int __a, vector bool int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool int __a, vector unsigned int __b) { int __cc; @@ -4395,6 +5079,7 @@ vec_any_le(vector bool int __a, vector unsigned int __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool int __a, vector bool int __b) { int __cc; @@ -4410,6 +5095,7 @@ vec_any_le(vector signed long long __a, vector signed long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector signed long long __a, vector bool long long __b) { int __cc; @@ -4417,6 +5103,7 @@ vec_any_le(vector signed long long __a, vector bool long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool long long __a, vector signed long long __b) { int __cc; @@ -4431,6 +5118,7 @@ vec_any_le(vector unsigned long long __a, vector unsigned long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -4438,6 +5126,7 @@ vec_any_le(vector unsigned long long __a, vector bool long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -4445,6 +5134,7 @@ vec_any_le(vector bool long long __a, vector unsigned long long __b) { return __cc != 0; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_le(vector bool long long __a, vector bool long long __b) { int __cc; @@ -4453,6 +5143,15 @@ vec_any_le(vector bool long long __a, vector bool long long __b) { return __cc != 0; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_le(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__b, __a, &__cc); + return __cc <= 1; +} +#endif + static inline __ATTRS_o_ai int vec_any_le(vector double __a, vector double __b) { int __cc; @@ -4469,6 +5168,7 @@ vec_any_lt(vector signed char __a, vector signed char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector signed char __a, vector bool char __b) { int __cc; @@ -4476,6 +5176,7 @@ vec_any_lt(vector signed char __a, vector bool char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool char __a, vector signed char __b) { int __cc; @@ -4490,6 +5191,7 @@ vec_any_lt(vector unsigned char __a, vector unsigned char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector unsigned char __a, vector bool char __b) { int __cc; @@ -4497,6 +5199,7 @@ vec_any_lt(vector unsigned char __a, vector bool char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool char __a, vector unsigned char __b) { int __cc; @@ -4504,6 +5207,7 @@ vec_any_lt(vector bool char __a, vector unsigned char __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool char __a, vector bool char __b) { int __cc; @@ -4519,6 +5223,7 @@ vec_any_lt(vector signed short __a, vector signed short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector signed short __a, vector bool short __b) { int __cc; @@ -4526,6 +5231,7 @@ vec_any_lt(vector signed short __a, vector bool short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool short __a, vector signed short __b) { int __cc; @@ -4540,6 +5246,7 @@ vec_any_lt(vector unsigned short __a, vector unsigned short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector unsigned short __a, vector bool short __b) { int __cc; @@ -4547,6 +5254,7 @@ vec_any_lt(vector unsigned short __a, vector bool short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool short __a, vector unsigned short __b) { int __cc; @@ -4554,6 +5262,7 @@ vec_any_lt(vector bool short __a, vector unsigned short __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool short __a, vector bool short __b) { int __cc; @@ -4569,6 +5278,7 @@ vec_any_lt(vector signed int __a, vector signed int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector signed int __a, vector bool int __b) { int __cc; @@ -4576,6 +5286,7 @@ vec_any_lt(vector signed int __a, vector bool int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool int __a, vector signed int __b) { int __cc; @@ -4590,6 +5301,7 @@ vec_any_lt(vector unsigned int __a, vector unsigned int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector unsigned int __a, vector bool int __b) { int __cc; @@ -4597,6 +5309,7 @@ vec_any_lt(vector unsigned int __a, vector bool int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool int __a, vector unsigned int __b) { int __cc; @@ -4604,6 +5317,7 @@ vec_any_lt(vector bool int __a, vector unsigned int __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool int __a, vector bool int __b) { int __cc; @@ -4619,6 +5333,7 @@ vec_any_lt(vector signed long long __a, vector signed long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector signed long long __a, vector bool long long __b) { int __cc; @@ -4626,6 +5341,7 @@ vec_any_lt(vector signed long long __a, vector bool long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool long long __a, vector signed long long __b) { int __cc; @@ -4640,6 +5356,7 @@ vec_any_lt(vector unsigned long long __a, vector unsigned long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector unsigned long long __a, vector bool long long __b) { int __cc; @@ -4647,6 +5364,7 @@ vec_any_lt(vector unsigned long long __a, vector bool long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool long long __a, vector unsigned long long __b) { int __cc; @@ -4654,6 +5372,7 @@ vec_any_lt(vector bool long long __a, vector unsigned long long __b) { return __cc <= 1; } +// This prototype is deprecated. static inline __ATTRS_o_ai int vec_any_lt(vector bool long long __a, vector bool long long __b) { int __cc; @@ -4662,6 +5381,15 @@ vec_any_lt(vector bool long long __a, vector bool long long __b) { return __cc <= 1; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_lt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__b, __a, &__cc); + return __cc <= 1; +} +#endif + static inline __ATTRS_o_ai int vec_any_lt(vector double __a, vector double __b) { int __cc; @@ -4671,7 +5399,16 @@ vec_any_lt(vector double __a, vector double __b) { /*-- vec_any_nge ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_nge(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__a, __b, &__cc); + return __cc != 0; +} +#endif + +static inline __ATTRS_o_ai int vec_any_nge(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); @@ -4680,7 +5417,16 @@ vec_any_nge(vector double __a, vector double __b) { /*-- vec_any_ngt ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_ngt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__a, __b, &__cc); + return __cc != 0; +} +#endif + +static inline __ATTRS_o_ai int vec_any_ngt(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); @@ -4689,7 +5435,16 @@ vec_any_ngt(vector double __a, vector double __b) { /*-- vec_any_nle ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_nle(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchesbs(__b, __a, &__cc); + return __cc != 0; +} +#endif + +static inline __ATTRS_o_ai int vec_any_nle(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); @@ -4698,7 +5453,16 @@ vec_any_nle(vector double __a, vector double __b) { /*-- vec_any_nlt ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_nlt(vector float __a, vector float __b) { + int __cc; + __builtin_s390_vfchsbs(__b, __a, &__cc); + return __cc != 0; +} +#endif + +static inline __ATTRS_o_ai int vec_any_nlt(vector double __a, vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); @@ -4707,7 +5471,16 @@ vec_any_nlt(vector double __a, vector double __b) { /*-- vec_any_nan ------------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_nan(vector float __a) { + int __cc; + __builtin_s390_vftcisb(__a, 15, &__cc); + return __cc != 3; +} +#endif + +static inline __ATTRS_o_ai int vec_any_nan(vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); @@ -4716,7 +5489,16 @@ vec_any_nan(vector double __a) { /*-- vec_any_numeric --------------------------------------------------------*/ -static inline __ATTRS_ai int +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_numeric(vector float __a) { + int __cc; + __builtin_s390_vftcisb(__a, 15, &__cc); + return __cc != 0; +} +#endif + +static inline __ATTRS_o_ai int vec_any_numeric(vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); @@ -4735,11 +5517,13 @@ vec_andc(vector signed char __a, vector signed char __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_andc(vector bool char __a, vector signed char __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_andc(vector signed char __a, vector bool char __b) { return __a & ~__b; @@ -4750,11 +5534,13 @@ vec_andc(vector unsigned char __a, vector unsigned char __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_andc(vector bool char __a, vector unsigned char __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_andc(vector unsigned char __a, vector bool char __b) { return __a & ~__b; @@ -4770,11 +5556,13 @@ vec_andc(vector signed short __a, vector signed short __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_andc(vector bool short __a, vector signed short __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_andc(vector signed short __a, vector bool short __b) { return __a & ~__b; @@ -4785,11 +5573,13 @@ vec_andc(vector unsigned short __a, vector unsigned short __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_andc(vector bool short __a, vector unsigned short __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_andc(vector unsigned short __a, vector bool short __b) { return __a & ~__b; @@ -4805,11 +5595,13 @@ vec_andc(vector signed int __a, vector signed int __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_andc(vector bool int __a, vector signed int __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_andc(vector signed int __a, vector bool int __b) { return __a & ~__b; @@ -4820,11 +5612,13 @@ vec_andc(vector unsigned int __a, vector unsigned int __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_andc(vector bool int __a, vector unsigned int __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_andc(vector unsigned int __a, vector bool int __b) { return __a & ~__b; @@ -4840,11 +5634,13 @@ vec_andc(vector signed long long __a, vector signed long long __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_andc(vector bool long long __a, vector signed long long __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_andc(vector signed long long __a, vector bool long long __b) { return __a & ~__b; @@ -4855,28 +5651,40 @@ vec_andc(vector unsigned long long __a, vector unsigned long long __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_andc(vector bool long long __a, vector unsigned long long __b) { return __a & ~__b; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_andc(vector unsigned long long __a, vector bool long long __b) { return __a & ~__b; } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_andc(vector float __a, vector float __b) { + return (vector float)((vector unsigned int)__a & + ~(vector unsigned int)__b); +} +#endif + static inline __ATTRS_o_ai vector double vec_andc(vector double __a, vector double __b) { return (vector double)((vector unsigned long long)__a & ~(vector unsigned long long)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector double vec_andc(vector bool long long __a, vector double __b) { return (vector double)((vector unsigned long long)__a & ~(vector unsigned long long)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector double vec_andc(vector double __a, vector bool long long __b) { return (vector double)((vector unsigned long long)__a & @@ -4895,11 +5703,13 @@ vec_nor(vector signed char __a, vector signed char __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_nor(vector bool char __a, vector signed char __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_nor(vector signed char __a, vector bool char __b) { return ~(__a | __b); @@ -4910,11 +5720,13 @@ vec_nor(vector unsigned char __a, vector unsigned char __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_nor(vector bool char __a, vector unsigned char __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_nor(vector unsigned char __a, vector bool char __b) { return ~(__a | __b); @@ -4930,11 +5742,13 @@ vec_nor(vector signed short __a, vector signed short __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_nor(vector bool short __a, vector signed short __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_nor(vector signed short __a, vector bool short __b) { return ~(__a | __b); @@ -4945,11 +5759,13 @@ vec_nor(vector unsigned short __a, vector unsigned short __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_nor(vector bool short __a, vector unsigned short __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_nor(vector unsigned short __a, vector bool short __b) { return ~(__a | __b); @@ -4965,11 +5781,13 @@ vec_nor(vector signed int __a, vector signed int __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_nor(vector bool int __a, vector signed int __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_nor(vector signed int __a, vector bool int __b) { return ~(__a | __b); @@ -4980,11 +5798,13 @@ vec_nor(vector unsigned int __a, vector unsigned int __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_nor(vector bool int __a, vector unsigned int __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_nor(vector unsigned int __a, vector bool int __b) { return ~(__a | __b); @@ -5000,11 +5820,13 @@ vec_nor(vector signed long long __a, vector signed long long __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_nor(vector bool long long __a, vector signed long long __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_nor(vector signed long long __a, vector bool long long __b) { return ~(__a | __b); @@ -5015,34 +5837,274 @@ vec_nor(vector unsigned long long __a, vector unsigned long long __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_nor(vector bool long long __a, vector unsigned long long __b) { return ~(__a | __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_nor(vector unsigned long long __a, vector bool long long __b) { return ~(__a | __b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_nor(vector float __a, vector float __b) { + return (vector float)~((vector unsigned int)__a | + (vector unsigned int)__b); +} +#endif + static inline __ATTRS_o_ai vector double vec_nor(vector double __a, vector double __b) { return (vector double)~((vector unsigned long long)__a | (vector unsigned long long)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector double vec_nor(vector bool long long __a, vector double __b) { return (vector double)~((vector unsigned long long)__a | (vector unsigned long long)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector double vec_nor(vector double __a, vector bool long long __b) { return (vector double)~((vector unsigned long long)__a | (vector unsigned long long)__b); } +/*-- vec_orc ----------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool char +vec_orc(vector bool char __a, vector bool char __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector signed char +vec_orc(vector signed char __a, vector signed char __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_orc(vector unsigned char __a, vector unsigned char __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector bool short +vec_orc(vector bool short __a, vector bool short __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector signed short +vec_orc(vector signed short __a, vector signed short __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_orc(vector unsigned short __a, vector unsigned short __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector bool int +vec_orc(vector bool int __a, vector bool int __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector signed int +vec_orc(vector signed int __a, vector signed int __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_orc(vector unsigned int __a, vector unsigned int __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector bool long long +vec_orc(vector bool long long __a, vector bool long long __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector signed long long +vec_orc(vector signed long long __a, vector signed long long __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_orc(vector unsigned long long __a, vector unsigned long long __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai vector float +vec_orc(vector float __a, vector float __b) { + return (vector float)((vector unsigned int)__a & + ~(vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector double +vec_orc(vector double __a, vector double __b) { + return (vector double)((vector unsigned long long)__a & + ~(vector unsigned long long)__b); +} +#endif + +/*-- vec_nand ---------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool char +vec_nand(vector bool char __a, vector bool char __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_nand(vector signed char __a, vector signed char __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_nand(vector unsigned char __a, vector unsigned char __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_nand(vector bool short __a, vector bool short __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_nand(vector signed short __a, vector signed short __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_nand(vector unsigned short __a, vector unsigned short __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_nand(vector bool int __a, vector bool int __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_nand(vector signed int __a, vector signed int __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_nand(vector unsigned int __a, vector unsigned int __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_nand(vector bool long long __a, vector bool long long __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_nand(vector signed long long __a, vector signed long long __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_nand(vector unsigned long long __a, vector unsigned long long __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai vector float +vec_nand(vector float __a, vector float __b) { + return (vector float)~((vector unsigned int)__a & + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector double +vec_nand(vector double __a, vector double __b) { + return (vector double)~((vector unsigned long long)__a & + (vector unsigned long long)__b); +} +#endif + +/*-- vec_eqv ----------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector bool char +vec_eqv(vector bool char __a, vector bool char __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector signed char +vec_eqv(vector signed char __a, vector signed char __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_eqv(vector unsigned char __a, vector unsigned char __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector bool short +vec_eqv(vector bool short __a, vector bool short __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector signed short +vec_eqv(vector signed short __a, vector signed short __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_eqv(vector unsigned short __a, vector unsigned short __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector bool int +vec_eqv(vector bool int __a, vector bool int __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector signed int +vec_eqv(vector signed int __a, vector signed int __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_eqv(vector unsigned int __a, vector unsigned int __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector bool long long +vec_eqv(vector bool long long __a, vector bool long long __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector signed long long +vec_eqv(vector signed long long __a, vector signed long long __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_eqv(vector unsigned long long __a, vector unsigned long long __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai vector float +vec_eqv(vector float __a, vector float __b) { + return (vector float)~((vector unsigned int)__a ^ + (vector unsigned int)__b); +} + +static inline __ATTRS_o_ai vector double +vec_eqv(vector double __a, vector double __b) { + return (vector double)~((vector unsigned long long)__a ^ + (vector unsigned long long)__b); +} +#endif + /*-- vec_cntlz --------------------------------------------------------------*/ static inline __ATTRS_o_ai vector unsigned char @@ -5323,30 +6385,35 @@ vec_sll(vector signed char __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_sll(vector signed char __a, vector unsigned short __b) { return (vector signed char)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_sll(vector signed char __a, vector unsigned int __b) { return (vector signed char)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_sll(vector bool char __a, vector unsigned char __b) { return (vector bool char)__builtin_s390_vsl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_sll(vector bool char __a, vector unsigned short __b) { return (vector bool char)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_sll(vector bool char __a, vector unsigned int __b) { return (vector bool char)__builtin_s390_vsl( @@ -5358,11 +6425,13 @@ vec_sll(vector unsigned char __a, vector unsigned char __b) { return __builtin_s390_vsl(__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_sll(vector unsigned char __a, vector unsigned short __b) { return __builtin_s390_vsl(__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_sll(vector unsigned char __a, vector unsigned int __b) { return __builtin_s390_vsl(__a, (vector unsigned char)__b); @@ -5374,30 +6443,35 @@ vec_sll(vector signed short __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_sll(vector signed short __a, vector unsigned short __b) { return (vector signed short)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_sll(vector signed short __a, vector unsigned int __b) { return (vector signed short)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_sll(vector bool short __a, vector unsigned char __b) { return (vector bool short)__builtin_s390_vsl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_sll(vector bool short __a, vector unsigned short __b) { return (vector bool short)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_sll(vector bool short __a, vector unsigned int __b) { return (vector bool short)__builtin_s390_vsl( @@ -5410,12 +6484,14 @@ vec_sll(vector unsigned short __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_sll(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_sll(vector unsigned short __a, vector unsigned int __b) { return (vector unsigned short)__builtin_s390_vsl( @@ -5428,30 +6504,35 @@ vec_sll(vector signed int __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_sll(vector signed int __a, vector unsigned short __b) { return (vector signed int)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_sll(vector signed int __a, vector unsigned int __b) { return (vector signed int)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_sll(vector bool int __a, vector unsigned char __b) { return (vector bool int)__builtin_s390_vsl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_sll(vector bool int __a, vector unsigned short __b) { return (vector bool int)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_sll(vector bool int __a, vector unsigned int __b) { return (vector bool int)__builtin_s390_vsl( @@ -5464,12 +6545,14 @@ vec_sll(vector unsigned int __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_sll(vector unsigned int __a, vector unsigned short __b) { return (vector unsigned int)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_sll(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_s390_vsl( @@ -5482,30 +6565,35 @@ vec_sll(vector signed long long __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_sll(vector signed long long __a, vector unsigned short __b) { return (vector signed long long)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_sll(vector signed long long __a, vector unsigned int __b) { return (vector signed long long)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_sll(vector bool long long __a, vector unsigned char __b) { return (vector bool long long)__builtin_s390_vsl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_sll(vector bool long long __a, vector unsigned short __b) { return (vector bool long long)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_sll(vector bool long long __a, vector unsigned int __b) { return (vector bool long long)__builtin_s390_vsl( @@ -5518,12 +6606,14 @@ vec_sll(vector unsigned long long __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_sll(vector unsigned long long __a, vector unsigned short __b) { return (vector unsigned long long)__builtin_s390_vsl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_sll(vector unsigned long long __a, vector unsigned int __b) { return (vector unsigned long long)__builtin_s390_vsl( @@ -5626,6 +6716,20 @@ vec_slb(vector unsigned long long __a, vector unsigned long long __b) { (vector unsigned char)__a, (vector unsigned char)__b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_slb(vector float __a, vector signed int __b) { + return (vector float)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector float +vec_slb(vector float __a, vector unsigned int __b) { + return (vector float)__builtin_s390_vslb( + (vector unsigned char)__a, (vector unsigned char)__b); +} +#endif + static inline __ATTRS_o_ai vector double vec_slb(vector double __a, vector signed long long __b) { return (vector double)__builtin_s390_vslb( @@ -5644,6 +6748,10 @@ extern __ATTRS_o vector signed char vec_sld(vector signed char __a, vector signed char __b, int __c) __constant_range(__c, 0, 15); +extern __ATTRS_o vector bool char +vec_sld(vector bool char __a, vector bool char __b, int __c) + __constant_range(__c, 0, 15); + extern __ATTRS_o vector unsigned char vec_sld(vector unsigned char __a, vector unsigned char __b, int __c) __constant_range(__c, 0, 15); @@ -5652,6 +6760,10 @@ extern __ATTRS_o vector signed short vec_sld(vector signed short __a, vector signed short __b, int __c) __constant_range(__c, 0, 15); +extern __ATTRS_o vector bool short +vec_sld(vector bool short __a, vector bool short __b, int __c) + __constant_range(__c, 0, 15); + extern __ATTRS_o vector unsigned short vec_sld(vector unsigned short __a, vector unsigned short __b, int __c) __constant_range(__c, 0, 15); @@ -5660,6 +6772,10 @@ extern __ATTRS_o vector signed int vec_sld(vector signed int __a, vector signed int __b, int __c) __constant_range(__c, 0, 15); +extern __ATTRS_o vector bool int +vec_sld(vector bool int __a, vector bool int __b, int __c) + __constant_range(__c, 0, 15); + extern __ATTRS_o vector unsigned int vec_sld(vector unsigned int __a, vector unsigned int __b, int __c) __constant_range(__c, 0, 15); @@ -5668,10 +6784,20 @@ extern __ATTRS_o vector signed long long vec_sld(vector signed long long __a, vector signed long long __b, int __c) __constant_range(__c, 0, 15); +extern __ATTRS_o vector bool long long +vec_sld(vector bool long long __a, vector bool long long __b, int __c) + __constant_range(__c, 0, 15); + extern __ATTRS_o vector unsigned long long vec_sld(vector unsigned long long __a, vector unsigned long long __b, int __c) __constant_range(__c, 0, 15); +#if __ARCH__ >= 12 +extern __ATTRS_o vector float +vec_sld(vector float __a, vector float __b, int __c) + __constant_range(__c, 0, 15); +#endif + extern __ATTRS_o vector double vec_sld(vector double __a, vector double __b, int __c) __constant_range(__c, 0, 15); @@ -5714,6 +6840,7 @@ extern __ATTRS_o vector unsigned long long vec_sldw(vector unsigned long long __a, vector unsigned long long __b, int __c) __constant_range(__c, 0, 3); +// This prototype is deprecated. extern __ATTRS_o vector double vec_sldw(vector double __a, vector double __b, int __c) __constant_range(__c, 0, 3); @@ -5730,30 +6857,35 @@ vec_sral(vector signed char __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_sral(vector signed char __a, vector unsigned short __b) { return (vector signed char)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_sral(vector signed char __a, vector unsigned int __b) { return (vector signed char)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_sral(vector bool char __a, vector unsigned char __b) { return (vector bool char)__builtin_s390_vsra( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_sral(vector bool char __a, vector unsigned short __b) { return (vector bool char)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_sral(vector bool char __a, vector unsigned int __b) { return (vector bool char)__builtin_s390_vsra( @@ -5765,11 +6897,13 @@ vec_sral(vector unsigned char __a, vector unsigned char __b) { return __builtin_s390_vsra(__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_sral(vector unsigned char __a, vector unsigned short __b) { return __builtin_s390_vsra(__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_sral(vector unsigned char __a, vector unsigned int __b) { return __builtin_s390_vsra(__a, (vector unsigned char)__b); @@ -5781,30 +6915,35 @@ vec_sral(vector signed short __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_sral(vector signed short __a, vector unsigned short __b) { return (vector signed short)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_sral(vector signed short __a, vector unsigned int __b) { return (vector signed short)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_sral(vector bool short __a, vector unsigned char __b) { return (vector bool short)__builtin_s390_vsra( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_sral(vector bool short __a, vector unsigned short __b) { return (vector bool short)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_sral(vector bool short __a, vector unsigned int __b) { return (vector bool short)__builtin_s390_vsra( @@ -5817,12 +6956,14 @@ vec_sral(vector unsigned short __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_sral(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_sral(vector unsigned short __a, vector unsigned int __b) { return (vector unsigned short)__builtin_s390_vsra( @@ -5835,30 +6976,35 @@ vec_sral(vector signed int __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_sral(vector signed int __a, vector unsigned short __b) { return (vector signed int)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_sral(vector signed int __a, vector unsigned int __b) { return (vector signed int)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_sral(vector bool int __a, vector unsigned char __b) { return (vector bool int)__builtin_s390_vsra( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_sral(vector bool int __a, vector unsigned short __b) { return (vector bool int)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_sral(vector bool int __a, vector unsigned int __b) { return (vector bool int)__builtin_s390_vsra( @@ -5871,12 +7017,14 @@ vec_sral(vector unsigned int __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_sral(vector unsigned int __a, vector unsigned short __b) { return (vector unsigned int)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_sral(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_s390_vsra( @@ -5889,30 +7037,35 @@ vec_sral(vector signed long long __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_sral(vector signed long long __a, vector unsigned short __b) { return (vector signed long long)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_sral(vector signed long long __a, vector unsigned int __b) { return (vector signed long long)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_sral(vector bool long long __a, vector unsigned char __b) { return (vector bool long long)__builtin_s390_vsra( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_sral(vector bool long long __a, vector unsigned short __b) { return (vector bool long long)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_sral(vector bool long long __a, vector unsigned int __b) { return (vector bool long long)__builtin_s390_vsra( @@ -5925,12 +7078,14 @@ vec_sral(vector unsigned long long __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_sral(vector unsigned long long __a, vector unsigned short __b) { return (vector unsigned long long)__builtin_s390_vsra( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_sral(vector unsigned long long __a, vector unsigned int __b) { return (vector unsigned long long)__builtin_s390_vsra( @@ -6033,6 +7188,20 @@ vec_srab(vector unsigned long long __a, vector unsigned long long __b) { (vector unsigned char)__a, (vector unsigned char)__b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_srab(vector float __a, vector signed int __b) { + return (vector float)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector float +vec_srab(vector float __a, vector unsigned int __b) { + return (vector float)__builtin_s390_vsrab( + (vector unsigned char)__a, (vector unsigned char)__b); +} +#endif + static inline __ATTRS_o_ai vector double vec_srab(vector double __a, vector signed long long __b) { return (vector double)__builtin_s390_vsrab( @@ -6053,30 +7222,35 @@ vec_srl(vector signed char __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_srl(vector signed char __a, vector unsigned short __b) { return (vector signed char)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_srl(vector signed char __a, vector unsigned int __b) { return (vector signed char)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_srl(vector bool char __a, vector unsigned char __b) { return (vector bool char)__builtin_s390_vsrl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_srl(vector bool char __a, vector unsigned short __b) { return (vector bool char)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool char vec_srl(vector bool char __a, vector unsigned int __b) { return (vector bool char)__builtin_s390_vsrl( @@ -6088,11 +7262,13 @@ vec_srl(vector unsigned char __a, vector unsigned char __b) { return __builtin_s390_vsrl(__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_srl(vector unsigned char __a, vector unsigned short __b) { return __builtin_s390_vsrl(__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_srl(vector unsigned char __a, vector unsigned int __b) { return __builtin_s390_vsrl(__a, (vector unsigned char)__b); @@ -6104,30 +7280,35 @@ vec_srl(vector signed short __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_srl(vector signed short __a, vector unsigned short __b) { return (vector signed short)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_srl(vector signed short __a, vector unsigned int __b) { return (vector signed short)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_srl(vector bool short __a, vector unsigned char __b) { return (vector bool short)__builtin_s390_vsrl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_srl(vector bool short __a, vector unsigned short __b) { return (vector bool short)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool short vec_srl(vector bool short __a, vector unsigned int __b) { return (vector bool short)__builtin_s390_vsrl( @@ -6140,12 +7321,14 @@ vec_srl(vector unsigned short __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_srl(vector unsigned short __a, vector unsigned short __b) { return (vector unsigned short)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_srl(vector unsigned short __a, vector unsigned int __b) { return (vector unsigned short)__builtin_s390_vsrl( @@ -6158,30 +7341,35 @@ vec_srl(vector signed int __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_srl(vector signed int __a, vector unsigned short __b) { return (vector signed int)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_srl(vector signed int __a, vector unsigned int __b) { return (vector signed int)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_srl(vector bool int __a, vector unsigned char __b) { return (vector bool int)__builtin_s390_vsrl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_srl(vector bool int __a, vector unsigned short __b) { return (vector bool int)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool int vec_srl(vector bool int __a, vector unsigned int __b) { return (vector bool int)__builtin_s390_vsrl( @@ -6194,12 +7382,14 @@ vec_srl(vector unsigned int __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_srl(vector unsigned int __a, vector unsigned short __b) { return (vector unsigned int)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_srl(vector unsigned int __a, vector unsigned int __b) { return (vector unsigned int)__builtin_s390_vsrl( @@ -6212,30 +7402,35 @@ vec_srl(vector signed long long __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_srl(vector signed long long __a, vector unsigned short __b) { return (vector signed long long)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_srl(vector signed long long __a, vector unsigned int __b) { return (vector signed long long)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_srl(vector bool long long __a, vector unsigned char __b) { return (vector bool long long)__builtin_s390_vsrl( (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_srl(vector bool long long __a, vector unsigned short __b) { return (vector bool long long)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector bool long long vec_srl(vector bool long long __a, vector unsigned int __b) { return (vector bool long long)__builtin_s390_vsrl( @@ -6248,12 +7443,14 @@ vec_srl(vector unsigned long long __a, vector unsigned char __b) { (vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_srl(vector unsigned long long __a, vector unsigned short __b) { return (vector unsigned long long)__builtin_s390_vsrl( (vector unsigned char)__a, (vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_srl(vector unsigned long long __a, vector unsigned int __b) { return (vector unsigned long long)__builtin_s390_vsrl( @@ -6356,6 +7553,20 @@ vec_srb(vector unsigned long long __a, vector unsigned long long __b) { (vector unsigned char)__a, (vector unsigned char)__b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_srb(vector float __a, vector signed int __b) { + return (vector float)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} + +static inline __ATTRS_o_ai vector float +vec_srb(vector float __a, vector unsigned int __b) { + return (vector float)__builtin_s390_vsrlb( + (vector unsigned char)__a, (vector unsigned char)__b); +} +#endif + static inline __ATTRS_o_ai vector double vec_srb(vector double __a, vector signed long long __b) { return (vector double)__builtin_s390_vsrlb( @@ -6390,6 +7601,13 @@ vec_abs(vector signed long long __a) { return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed long long)0)); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_abs(vector float __a) { + return __builtin_s390_vflpsb(__a); +} +#endif + static inline __ATTRS_o_ai vector double vec_abs(vector double __a) { return __builtin_s390_vflpdb(__a); @@ -6397,7 +7615,14 @@ vec_abs(vector double __a) { /*-- vec_nabs ---------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_nabs(vector float __a) { + return __builtin_s390_vflnsb(__a); +} +#endif + +static inline __ATTRS_o_ai vector double vec_nabs(vector double __a) { return __builtin_s390_vflndb(__a); } @@ -6409,12 +7634,14 @@ vec_max(vector signed char __a, vector signed char __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_max(vector signed char __a, vector bool char __b) { vector signed char __bc = (vector signed char)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_max(vector bool char __a, vector signed char __b) { vector signed char __ac = (vector signed char)__a; @@ -6426,12 +7653,14 @@ vec_max(vector unsigned char __a, vector unsigned char __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_max(vector unsigned char __a, vector bool char __b) { vector unsigned char __bc = (vector unsigned char)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_max(vector bool char __a, vector unsigned char __b) { vector unsigned char __ac = (vector unsigned char)__a; @@ -6443,12 +7672,14 @@ vec_max(vector signed short __a, vector signed short __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_max(vector signed short __a, vector bool short __b) { vector signed short __bc = (vector signed short)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_max(vector bool short __a, vector signed short __b) { vector signed short __ac = (vector signed short)__a; @@ -6460,12 +7691,14 @@ vec_max(vector unsigned short __a, vector unsigned short __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_max(vector unsigned short __a, vector bool short __b) { vector unsigned short __bc = (vector unsigned short)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_max(vector bool short __a, vector unsigned short __b) { vector unsigned short __ac = (vector unsigned short)__a; @@ -6477,12 +7710,14 @@ vec_max(vector signed int __a, vector signed int __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_max(vector signed int __a, vector bool int __b) { vector signed int __bc = (vector signed int)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_max(vector bool int __a, vector signed int __b) { vector signed int __ac = (vector signed int)__a; @@ -6494,12 +7729,14 @@ vec_max(vector unsigned int __a, vector unsigned int __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_max(vector unsigned int __a, vector bool int __b) { vector unsigned int __bc = (vector unsigned int)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_max(vector bool int __a, vector unsigned int __b) { vector unsigned int __ac = (vector unsigned int)__a; @@ -6511,12 +7748,14 @@ vec_max(vector signed long long __a, vector signed long long __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_max(vector signed long long __a, vector bool long long __b) { vector signed long long __bc = (vector signed long long)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_max(vector bool long long __a, vector signed long long __b) { vector signed long long __ac = (vector signed long long)__a; @@ -6528,21 +7767,34 @@ vec_max(vector unsigned long long __a, vector unsigned long long __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_max(vector unsigned long long __a, vector bool long long __b) { vector unsigned long long __bc = (vector unsigned long long)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_max(vector bool long long __a, vector unsigned long long __b) { vector unsigned long long __ac = (vector unsigned long long)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_max(vector float __a, vector float __b) { + return __builtin_s390_vfmaxsb(__a, __b, 0); +} +#endif + static inline __ATTRS_o_ai vector double vec_max(vector double __a, vector double __b) { +#if __ARCH__ >= 12 + return __builtin_s390_vfmaxdb(__a, __b, 0); +#else return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +#endif } /*-- vec_min ----------------------------------------------------------------*/ @@ -6552,12 +7804,14 @@ vec_min(vector signed char __a, vector signed char __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_min(vector signed char __a, vector bool char __b) { vector signed char __bc = (vector signed char)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed char vec_min(vector bool char __a, vector signed char __b) { vector signed char __ac = (vector signed char)__a; @@ -6569,12 +7823,14 @@ vec_min(vector unsigned char __a, vector unsigned char __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_min(vector unsigned char __a, vector bool char __b) { vector unsigned char __bc = (vector unsigned char)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned char vec_min(vector bool char __a, vector unsigned char __b) { vector unsigned char __ac = (vector unsigned char)__a; @@ -6586,12 +7842,14 @@ vec_min(vector signed short __a, vector signed short __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_min(vector signed short __a, vector bool short __b) { vector signed short __bc = (vector signed short)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed short vec_min(vector bool short __a, vector signed short __b) { vector signed short __ac = (vector signed short)__a; @@ -6603,12 +7861,14 @@ vec_min(vector unsigned short __a, vector unsigned short __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_min(vector unsigned short __a, vector bool short __b) { vector unsigned short __bc = (vector unsigned short)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned short vec_min(vector bool short __a, vector unsigned short __b) { vector unsigned short __ac = (vector unsigned short)__a; @@ -6620,12 +7880,14 @@ vec_min(vector signed int __a, vector signed int __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_min(vector signed int __a, vector bool int __b) { vector signed int __bc = (vector signed int)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed int vec_min(vector bool int __a, vector signed int __b) { vector signed int __ac = (vector signed int)__a; @@ -6637,12 +7899,14 @@ vec_min(vector unsigned int __a, vector unsigned int __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_min(vector unsigned int __a, vector bool int __b) { vector unsigned int __bc = (vector unsigned int)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned int vec_min(vector bool int __a, vector unsigned int __b) { vector unsigned int __ac = (vector unsigned int)__a; @@ -6654,12 +7918,14 @@ vec_min(vector signed long long __a, vector signed long long __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_min(vector signed long long __a, vector bool long long __b) { vector signed long long __bc = (vector signed long long)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_min(vector bool long long __a, vector signed long long __b) { vector signed long long __ac = (vector signed long long)__a; @@ -6671,21 +7937,34 @@ vec_min(vector unsigned long long __a, vector unsigned long long __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_min(vector unsigned long long __a, vector bool long long __b) { vector unsigned long long __bc = (vector unsigned long long)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_min(vector bool long long __a, vector unsigned long long __b) { vector unsigned long long __ac = (vector unsigned long long)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_min(vector float __a, vector float __b) { + return __builtin_s390_vfminsb(__a, __b, 0); +} +#endif + static inline __ATTRS_o_ai vector double vec_min(vector double __a, vector double __b) { +#if __ARCH__ >= 12 + return __builtin_s390_vfmindb(__a, __b, 0); +#else return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +#endif } /*-- vec_add_u128 -----------------------------------------------------------*/ @@ -7126,6 +8405,13 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) { return __builtin_s390_vmlof(__a, __b); } +/*-- vec_msum_u128 ----------------------------------------------------------*/ + +#if __ARCH__ >= 12 +#define vec_msum_u128(X, Y, Z, W) \ + ((vector unsigned char)__builtin_s390_vmslg((X), (Y), (Z), (W))); +#endif + /*-- vec_sub_u128 -----------------------------------------------------------*/ static inline __ATTRS_ai vector unsigned char @@ -7263,6 +8549,14 @@ vec_test_mask(vector unsigned long long __a, vector unsigned long long __b) { (vector unsigned char)__b); } +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_test_mask(vector float __a, vector unsigned int __b) { + return __builtin_s390_vtm((vector unsigned char)__a, + (vector unsigned char)__b); +} +#endif + static inline __ATTRS_o_ai int vec_test_mask(vector double __a, vector unsigned long long __b) { return __builtin_s390_vtm((vector unsigned char)__a, @@ -7271,27 +8565,77 @@ vec_test_mask(vector double __a, vector unsigned long long __b) { /*-- vec_madd ---------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_madd(vector float __a, vector float __b, vector float __c) { + return __builtin_s390_vfmasb(__a, __b, __c); +} +#endif + +static inline __ATTRS_o_ai vector double vec_madd(vector double __a, vector double __b, vector double __c) { return __builtin_s390_vfmadb(__a, __b, __c); } /*-- vec_msub ---------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_msub(vector float __a, vector float __b, vector float __c) { + return __builtin_s390_vfmssb(__a, __b, __c); +} +#endif + +static inline __ATTRS_o_ai vector double vec_msub(vector double __a, vector double __b, vector double __c) { return __builtin_s390_vfmsdb(__a, __b, __c); } +/*-- vec_nmadd ---------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_nmadd(vector float __a, vector float __b, vector float __c) { + return __builtin_s390_vfnmasb(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector double +vec_nmadd(vector double __a, vector double __b, vector double __c) { + return __builtin_s390_vfnmadb(__a, __b, __c); +} +#endif + +/*-- vec_nmsub ---------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_nmsub(vector float __a, vector float __b, vector float __c) { + return __builtin_s390_vfnmssb(__a, __b, __c); +} + +static inline __ATTRS_o_ai vector double +vec_nmsub(vector double __a, vector double __b, vector double __c) { + return __builtin_s390_vfnmsdb(__a, __b, __c); +} +#endif + /*-- vec_sqrt ---------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_sqrt(vector float __a) { + return __builtin_s390_vfsqsb(__a); +} +#endif + +static inline __ATTRS_o_ai vector double vec_sqrt(vector double __a) { return __builtin_s390_vfsqdb(__a); } /*-- vec_ld2f ---------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai vector double vec_ld2f(const float *__ptr) { typedef float __v2f32 __attribute__((__vector_size__(8))); @@ -7300,6 +8644,7 @@ vec_ld2f(const float *__ptr) { /*-- vec_st2f ---------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai void vec_st2f(vector double __a, float *__ptr) { typedef float __v2f32 __attribute__((__vector_size__(8))); @@ -7308,6 +8653,7 @@ vec_st2f(vector double __a, float *__ptr) { /*-- vec_ctd ----------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai vector double vec_ctd(vector signed long long __a, int __b) __constant_range(__b, 0, 31) { @@ -7316,6 +8662,7 @@ vec_ctd(vector signed long long __a, int __b) return __conv; } +// This prototype is deprecated. static inline __ATTRS_o_ai vector double vec_ctd(vector unsigned long long __a, int __b) __constant_range(__b, 0, 31) { @@ -7326,6 +8673,7 @@ vec_ctd(vector unsigned long long __a, int __b) /*-- vec_ctsl ---------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai vector signed long long vec_ctsl(vector double __a, int __b) __constant_range(__b, 0, 31) { @@ -7335,6 +8683,7 @@ vec_ctsl(vector double __a, int __b) /*-- vec_ctul ---------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai vector unsigned long long vec_ctul(vector double __a, int __b) __constant_range(__b, 0, 31) { @@ -7342,16 +8691,79 @@ vec_ctul(vector double __a, int __b) return __builtin_convertvector(__a, vector unsigned long long); } +/*-- vec_doublee ------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_ai vector double +vec_doublee(vector float __a) { + typedef float __v2f32 __attribute__((__vector_size__(8))); + __v2f32 __pack = __builtin_shufflevector(__a, __a, 0, 2); + return __builtin_convertvector(__pack, vector double); +} +#endif + +/*-- vec_floate -------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_ai vector float +vec_floate(vector double __a) { + typedef float __v2f32 __attribute__((__vector_size__(8))); + __v2f32 __pack = __builtin_convertvector(__a, __v2f32); + return __builtin_shufflevector(__pack, __pack, 0, -1, 1, -1); +} +#endif + +/*-- vec_double -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector double +vec_double(vector signed long long __a) { + return __builtin_convertvector(__a, vector double); +} + +static inline __ATTRS_o_ai vector double +vec_double(vector unsigned long long __a) { + return __builtin_convertvector(__a, vector double); +} + +/*-- vec_signed -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed long long +vec_signed(vector double __a) { + return __builtin_convertvector(__a, vector signed long long); +} + +/*-- vec_unsigned -----------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector unsigned long long +vec_unsigned(vector double __a) { + return __builtin_convertvector(__a, vector unsigned long long); +} + /*-- vec_roundp -------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_roundp(vector float __a) { + return __builtin_s390_vfisb(__a, 4, 6); +} +#endif + +static inline __ATTRS_o_ai vector double vec_roundp(vector double __a) { return __builtin_s390_vfidb(__a, 4, 6); } /*-- vec_ceil ---------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_ceil(vector float __a) { + // On this platform, vec_ceil never triggers the IEEE-inexact exception. + return __builtin_s390_vfisb(__a, 4, 6); +} +#endif + +static inline __ATTRS_o_ai vector double vec_ceil(vector double __a) { // On this platform, vec_ceil never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 6); @@ -7359,14 +8771,29 @@ vec_ceil(vector double __a) { /*-- vec_roundm -------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_roundm(vector float __a) { + return __builtin_s390_vfisb(__a, 4, 7); +} +#endif + +static inline __ATTRS_o_ai vector double vec_roundm(vector double __a) { return __builtin_s390_vfidb(__a, 4, 7); } /*-- vec_floor --------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_floor(vector float __a) { + // On this platform, vec_floor never triggers the IEEE-inexact exception. + return __builtin_s390_vfisb(__a, 4, 7); +} +#endif + +static inline __ATTRS_o_ai vector double vec_floor(vector double __a) { // On this platform, vec_floor never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 7); @@ -7374,14 +8801,29 @@ vec_floor(vector double __a) { /*-- vec_roundz -------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_roundz(vector float __a) { + return __builtin_s390_vfisb(__a, 4, 5); +} +#endif + +static inline __ATTRS_o_ai vector double vec_roundz(vector double __a) { return __builtin_s390_vfidb(__a, 4, 5); } /*-- vec_trunc --------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_trunc(vector float __a) { + // On this platform, vec_trunc never triggers the IEEE-inexact exception. + return __builtin_s390_vfisb(__a, 4, 5); +} +#endif + +static inline __ATTRS_o_ai vector double vec_trunc(vector double __a) { // On this platform, vec_trunc never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 5); @@ -7389,22 +8831,104 @@ vec_trunc(vector double __a) { /*-- vec_roundc -------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_roundc(vector float __a) { + return __builtin_s390_vfisb(__a, 4, 0); +} +#endif + +static inline __ATTRS_o_ai vector double vec_roundc(vector double __a) { return __builtin_s390_vfidb(__a, 4, 0); } +/*-- vec_rint ---------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_rint(vector float __a) { + // vec_rint may trigger the IEEE-inexact exception. + return __builtin_s390_vfisb(__a, 0, 0); +} +#endif + +static inline __ATTRS_o_ai vector double +vec_rint(vector double __a) { + // vec_rint may trigger the IEEE-inexact exception. + return __builtin_s390_vfidb(__a, 0, 0); +} + /*-- vec_round --------------------------------------------------------------*/ -static inline __ATTRS_ai vector double +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_round(vector float __a) { + return __builtin_s390_vfisb(__a, 4, 4); +} +#endif + +static inline __ATTRS_o_ai vector double vec_round(vector double __a) { return __builtin_s390_vfidb(__a, 4, 4); } /*-- vec_fp_test_data_class -------------------------------------------------*/ +#if __ARCH__ >= 12 +extern __ATTRS_o vector bool int +vec_fp_test_data_class(vector float __a, int __b, int *__c) + __constant_range(__b, 0, 4095); + +extern __ATTRS_o vector bool long long +vec_fp_test_data_class(vector double __a, int __b, int *__c) + __constant_range(__b, 0, 4095); + +#define vec_fp_test_data_class(X, Y, Z) \ + ((__typeof__((vec_fp_test_data_class)((X), (Y), (Z)))) \ + __extension__ ({ \ + vector unsigned char __res; \ + vector unsigned char __x = (vector unsigned char)(X); \ + int *__z = (Z); \ + switch (sizeof ((X)[0])) { \ + case 4: __res = (vector unsigned char) \ + __builtin_s390_vftcisb((vector float)__x, (Y), __z); \ + break; \ + default: __res = (vector unsigned char) \ + __builtin_s390_vftcidb((vector double)__x, (Y), __z); \ + break; \ + } __res; })) +#else #define vec_fp_test_data_class(X, Y, Z) \ ((vector bool long long)__builtin_s390_vftcidb((X), (Y), (Z))) +#endif + +#define __VEC_CLASS_FP_ZERO_P (1 << 11) +#define __VEC_CLASS_FP_ZERO_N (1 << 10) +#define __VEC_CLASS_FP_ZERO (__VEC_CLASS_FP_ZERO_P | __VEC_CLASS_FP_ZERO_N) +#define __VEC_CLASS_FP_NORMAL_P (1 << 9) +#define __VEC_CLASS_FP_NORMAL_N (1 << 8) +#define __VEC_CLASS_FP_NORMAL (__VEC_CLASS_FP_NORMAL_P | \ + __VEC_CLASS_FP_NORMAL_N) +#define __VEC_CLASS_FP_SUBNORMAL_P (1 << 7) +#define __VEC_CLASS_FP_SUBNORMAL_N (1 << 6) +#define __VEC_CLASS_FP_SUBNORMAL (__VEC_CLASS_FP_SUBNORMAL_P | \ + __VEC_CLASS_FP_SUBNORMAL_N) +#define __VEC_CLASS_FP_INFINITY_P (1 << 5) +#define __VEC_CLASS_FP_INFINITY_N (1 << 4) +#define __VEC_CLASS_FP_INFINITY (__VEC_CLASS_FP_INFINITY_P | \ + __VEC_CLASS_FP_INFINITY_N) +#define __VEC_CLASS_FP_QNAN_P (1 << 3) +#define __VEC_CLASS_FP_QNAN_N (1 << 2) +#define __VEC_CLASS_FP_QNAN (__VEC_CLASS_FP_QNAN_P | __VEC_CLASS_FP_QNAN_N) +#define __VEC_CLASS_FP_SNAN_P (1 << 1) +#define __VEC_CLASS_FP_SNAN_N (1 << 0) +#define __VEC_CLASS_FP_SNAN (__VEC_CLASS_FP_SNAN_P | __VEC_CLASS_FP_SNAN_N) +#define __VEC_CLASS_FP_NAN (__VEC_CLASS_FP_QNAN | __VEC_CLASS_FP_SNAN) +#define __VEC_CLASS_FP_NOT_NORMAL (__VEC_CLASS_FP_NAN | \ + __VEC_CLASS_FP_SUBNORMAL | \ + __VEC_CLASS_FP_ZERO | \ + __VEC_CLASS_FP_INFINITY) /*-- vec_cp_until_zero ------------------------------------------------------*/ diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingAction.cpp b/contrib/llvm/tools/clang/lib/Index/IndexingAction.cpp index cac24d4b9c4c..84d31200bab4 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexingAction.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexingAction.cpp @@ -177,6 +177,18 @@ void index::indexASTUnit(ASTUnit &Unit, DataConsumer->finish(); } +void index::indexTopLevelDecls(ASTContext &Ctx, ArrayRef Decls, + std::shared_ptr DataConsumer, + IndexingOptions Opts) { + IndexingContext IndexCtx(Opts, *DataConsumer); + IndexCtx.setASTContext(Ctx); + + DataConsumer->initialize(Ctx); + for (const Decl *D : Decls) + IndexCtx.indexTopLevelDecl(D); + DataConsumer->finish(); +} + void index::indexModuleFile(serialization::ModuleFile &Mod, ASTReader &Reader, std::shared_ptr DataConsumer, diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp index c4aa51d62f02..addee691e804 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp @@ -260,8 +260,10 @@ static const Decl *adjustParent(const Decl *Parent) { static const Decl *getCanonicalDecl(const Decl *D) { D = D->getCanonicalDecl(); if (auto TD = dyn_cast(D)) { - D = TD->getTemplatedDecl(); - assert(D->isCanonicalDecl()); + if (auto TTD = TD->getTemplatedDecl()) { + D = TTD; + assert(D->isCanonicalDecl()); + } } return D; diff --git a/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp b/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp index a201d1659073..f791d8d4bacc 100644 --- a/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/MacroArgs.cpp @@ -52,14 +52,14 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, UnexpArgTokens.size() * sizeof(Token)); // Construct the MacroArgs object. new (Result) - MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumArgs()); + MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams()); } else { Result = *ResultEnt; // Unlink this node from the preprocessors singly linked list. *ResultEnt = Result->ArgCache; Result->NumUnexpArgTokens = UnexpArgTokens.size(); Result->VarargsElided = VarargsElided; - Result->NumMacroArgs = MI->getNumArgs(); + Result->NumMacroArgs = MI->getNumParams(); } // Copy the actual unexpanded tokens to immediately after the result ptr. @@ -148,11 +148,11 @@ bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, const std::vector & MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, Preprocessor &PP) { - assert(Arg < MI->getNumArgs() && "Invalid argument number!"); + assert(Arg < MI->getNumParams() && "Invalid argument number!"); // If we have already computed this, return it. - if (PreExpArgTokens.size() < MI->getNumArgs()) - PreExpArgTokens.resize(MI->getNumArgs()); + if (PreExpArgTokens.size() < MI->getNumParams()) + PreExpArgTokens.resize(MI->getNumParams()); std::vector &Result = PreExpArgTokens[Arg]; if (!Result.empty()) return Result; diff --git a/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp b/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp index 1e5deeb1919b..6dc7841bc160 100644 --- a/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp @@ -17,8 +17,8 @@ using namespace clang; MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc), - ArgumentList(nullptr), - NumArguments(0), + ParameterList(nullptr), + NumParameters(0), IsDefinitionLengthCached(false), IsFunctionLike(false), IsC99Varargs(false), @@ -74,7 +74,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, // Check # tokens in replacement, number of args, and various flags all match. if (ReplacementTokens.size() != Other.ReplacementTokens.size() || - getNumArgs() != Other.getNumArgs() || + getNumParams() != Other.getNumParams() || isFunctionLike() != Other.isFunctionLike() || isC99Varargs() != Other.isC99Varargs() || isGNUVarargs() != Other.isGNUVarargs()) @@ -82,7 +82,8 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, if (Lexically) { // Check arguments. - for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end(); + for (param_iterator I = param_begin(), OI = Other.param_begin(), + E = param_end(); I != E; ++I, ++OI) if (*I != *OI) return false; } @@ -109,10 +110,10 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, return false; // With syntactic equivalence the parameter names can be different as long // as they are used in the same place. - int AArgNum = getArgumentNum(A.getIdentifierInfo()); + int AArgNum = getParameterNum(A.getIdentifierInfo()); if (AArgNum == -1) return false; - if (AArgNum != Other.getArgumentNum(B.getIdentifierInfo())) + if (AArgNum != Other.getParameterNum(B.getIdentifierInfo())) return false; continue; } @@ -141,12 +142,12 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const { Out << "\n #define "; if (IsFunctionLike) { Out << "("; - for (unsigned I = 0; I != NumArguments; ++I) { + for (unsigned I = 0; I != NumParameters; ++I) { if (I) Out << ", "; - Out << ArgumentList[I]->getName(); + Out << ParameterList[I]->getName(); } if (IsC99Varargs || IsGNUVarargs) { - if (NumArguments && IsC99Varargs) Out << ", "; + if (NumParameters && IsC99Varargs) Out << ", "; Out << "..."; } Out << ")"; diff --git a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp index 8c79e50176e1..b2450f516ba2 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp @@ -220,26 +220,18 @@ bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, return Diag(MacroNameTok, diag::err_pp_missing_macro_name); IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); - if (!II) { - bool Invalid = false; - std::string Spelling = getSpelling(MacroNameTok, &Invalid); - if (Invalid) - return Diag(MacroNameTok, diag::err_pp_macro_not_identifier); - II = getIdentifierInfo(Spelling); - - if (!II->isCPlusPlusOperatorKeyword()) - return Diag(MacroNameTok, diag::err_pp_macro_not_identifier); + if (!II) + return Diag(MacroNameTok, diag::err_pp_macro_not_identifier); + if (II->isCPlusPlusOperatorKeyword()) { // C++ 2.5p2: Alternative tokens behave the same as its primary token // except for their spellings. Diag(MacroNameTok, getLangOpts().MicrosoftExt ? diag::ext_pp_operator_used_as_macro_name : diag::err_pp_operator_used_as_macro_name) << II << MacroNameTok.getKind(); - // Allow #defining |and| and friends for Microsoft compatibility or // recovery when legacy C headers are included in C++. - MacroNameTok.setIdentifierInfo(II); } if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) { @@ -2143,11 +2135,11 @@ void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc, // Preprocessor Macro Directive Handling. //===----------------------------------------------------------------------===// -/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro +/// ReadMacroParameterList - The ( starting an argument list of a macro /// definition has just been read. Lex the rest of the arguments and the /// closing ), updating MI with what we learn. Return true if an error occurs /// parsing the arg list. -bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { +bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { SmallVector Arguments; while (true) { @@ -2181,7 +2173,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { // Add the __VA_ARGS__ identifier as an argument. Arguments.push_back(Ident__VA_ARGS__); MI->setIsC99Varargs(); - MI->setArgumentList(Arguments, BP); + MI->setParameterList(Arguments, BP); return false; case tok::eod: // #define X( Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); @@ -2215,7 +2207,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { Diag(Tok, diag::err_pp_expected_comma_in_arg_list); return true; case tok::r_paren: // #define X(A) - MI->setArgumentList(Arguments, BP); + MI->setParameterList(Arguments, BP); return false; case tok::comma: // #define X(A, break; @@ -2231,7 +2223,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { } MI->setIsGNUVarargs(); - MI->setArgumentList(Arguments, BP); + MI->setParameterList(Arguments, BP); return false; } } @@ -2280,28 +2272,20 @@ static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI, MI->getNumTokens() == 0; } -/// HandleDefineDirective - Implements \#define. This consumes the entire macro -/// line then lets the caller lex the next real token. -void Preprocessor::HandleDefineDirective(Token &DefineTok, - bool ImmediatelyAfterHeaderGuard) { - ++NumDefined; +// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the +// entire line) of the macro's tokens and adds them to MacroInfo, and while +// doing so performs certain validity checks including (but not limited to): +// - # (stringization) is followed by a macro parameter +// +// Returns a nullptr if an invalid sequence of tokens is encountered or returns +// a pointer to a MacroInfo object. - Token MacroNameTok; - bool MacroShadowsKeyword; - ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword); - - // Error reading macro name? If so, diagnostic already issued. - if (MacroNameTok.is(tok::eod)) - return; +MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( + const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) { Token LastTok = MacroNameTok; - - // If we are supposed to keep comments in #defines, reenable comment saving - // mode. - if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments); - // Create the new macro. - MacroInfo *MI = AllocateMacroInfo(MacroNameTok.getLocation()); + MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation()); Token Tok; LexUnexpandedToken(Tok); @@ -2323,11 +2307,11 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok, } else if (Tok.is(tok::l_paren)) { // This is a function-like macro definition. Read the argument list. MI->setIsFunctionLike(); - if (ReadMacroDefinitionArgList(MI, LastTok)) { + if (ReadMacroParameterList(MI, LastTok)) { // Throw away the rest of the line. if (CurPPLexer->ParsingPreprocessorDirective) DiscardUntilEndOfDirective(); - return; + return nullptr; } // If this is a definition of a variadic C99 function-like macro, not using @@ -2434,7 +2418,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok, // Check for a valid macro arg identifier. if (Tok.getIdentifierInfo() == nullptr || - MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) { + MI->getParameterNum(Tok.getIdentifierInfo()) == -1) { // If this is assembler-with-cpp mode, we accept random gibberish after // the '#' because '#' is often a comment character. However, change @@ -2450,7 +2434,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok, // Disable __VA_ARGS__ again. Ident__VA_ARGS__->setIsPoisoned(true); - return; + return nullptr; } } @@ -2463,15 +2447,39 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok, LexUnexpandedToken(Tok); } } + MI->setDefinitionEndLoc(LastTok.getLocation()); + // Disable __VA_ARGS__ again. + Ident__VA_ARGS__->setIsPoisoned(true); + + return MI; +} +/// HandleDefineDirective - Implements \#define. This consumes the entire macro +/// line then lets the caller lex the next real token. +void Preprocessor::HandleDefineDirective( + Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) { + ++NumDefined; + + Token MacroNameTok; + bool MacroShadowsKeyword; + ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.is(tok::eod)) + return; + + // If we are supposed to keep comments in #defines, reenable comment saving + // mode. + if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments); + + MacroInfo *const MI = ReadOptionalMacroParameterListAndBody( + MacroNameTok, ImmediatelyAfterHeaderGuard); + + if (!MI) return; if (MacroShadowsKeyword && !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) { Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword); - } - - // Disable __VA_ARGS__ again. - Ident__VA_ARGS__->setIsPoisoned(true); - + } // Check that there is no paste (##) operator at the beginning or end of the // replacement list. unsigned NumTokens = MI->getNumTokens(); @@ -2486,7 +2494,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok, } } - MI->setDefinitionEndLoc(LastTok.getLocation()); + // Finally, if this identifier already had a macro defined for it, verify that // the macro bodies are identical, and issue diagnostics if they are not. diff --git a/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp b/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp index 12f5084298df..d8431827e9cd 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp @@ -237,35 +237,32 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.setCodeCompletionReached(); PP.LexNonComment(PeekTok); } - - // If this token's spelling is a pp-identifier, check to see if it is - // 'defined' or if it is a macro. Note that we check here because many - // keywords are pp-identifiers, so we can't check the kind. - if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) { - // Handle "defined X" and "defined(X)". - if (II->isStr("defined")) - return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP); - - // If this identifier isn't 'defined' or one of the special - // preprocessor keywords and it wasn't macro expanded, it turns - // into a simple 0, unless it is the C++ keyword "true", in which case it - // turns into "1". - if (ValueLive && - II->getTokenID() != tok::kw_true && - II->getTokenID() != tok::kw_false) - PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II; - Result.Val = II->getTokenID() == tok::kw_true; - Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0. - Result.setIdentifier(II); - Result.setRange(PeekTok.getLocation()); - DT.IncludedUndefinedIds = (II->getTokenID() != tok::kw_true && - II->getTokenID() != tok::kw_false); - PP.LexNonComment(PeekTok); - return false; - } switch (PeekTok.getKind()) { - default: // Non-value token. + default: + // If this token's spelling is a pp-identifier, check to see if it is + // 'defined' or if it is a macro. Note that we check here because many + // keywords are pp-identifiers, so we can't check the kind. + if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) { + // Handle "defined X" and "defined(X)". + if (II->isStr("defined")) + return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP); + + if (!II->isCPlusPlusOperatorKeyword()) { + // If this identifier isn't 'defined' or one of the special + // preprocessor keywords and it wasn't macro expanded, it turns + // into a simple 0 + if (ValueLive) + PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II; + Result.Val = 0; + Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0. + Result.setIdentifier(II); + Result.setRange(PeekTok.getLocation()); + DT.IncludedUndefinedIds = true; + PP.LexNonComment(PeekTok); + return false; + } + } PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr); return true; case tok::eod: @@ -481,6 +478,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, DT.State = DefinedTracker::DefinedMacro; return false; } + case tok::kw_true: + case tok::kw_false: + Result.Val = PeekTok.getKind() == tok::kw_true; + Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0. + Result.setIdentifier(PeekTok.getIdentifierInfo()); + Result.setRange(PeekTok.getLocation()); + PP.LexNonComment(PeekTok); + return false; // FIXME: Handle #assert } diff --git a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp index 8af9a50cc204..3f8ede23da56 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -412,7 +412,7 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, // If this is a function-like macro invocation, it's safe to trivially expand // as long as the identifier is not a macro argument. - return std::find(MI->arg_begin(), MI->arg_end(), II) == MI->arg_end(); + return std::find(MI->param_begin(), MI->param_end(), II) == MI->param_end(); } /// isNextPPTokenLParen - Determine whether the next preprocessor token to be @@ -492,7 +492,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // Preprocessor directives used inside macro arguments are not portable, and // this enables the warning. InMacroArgs = true; - Args = ReadFunctionLikeMacroArgs(Identifier, MI, ExpansionEnd); + Args = ReadMacroCallArgumentList(Identifier, MI, ExpansionEnd); // Finished parsing args. InMacroArgs = false; @@ -745,11 +745,11 @@ static bool GenerateNewArgTokens(Preprocessor &PP, /// token is the '(' of the macro, this method is invoked to read all of the /// actual arguments specified for the macro invocation. This returns null on /// error. -MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, +MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, SourceLocation &MacroEnd) { // The number of fixed arguments to parse. - unsigned NumFixedArgsLeft = MI->getNumArgs(); + unsigned NumFixedArgsLeft = MI->getNumParams(); bool isVariadic = MI->isVariadic(); // Outer loop, while there are more arguments, keep reading them. @@ -889,7 +889,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, // Okay, we either found the r_paren. Check to see if we parsed too few // arguments. - unsigned MinArgsExpected = MI->getNumArgs(); + unsigned MinArgsExpected = MI->getNumParams(); // If this is not a variadic macro, and too many args were specified, emit // an error. diff --git a/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp b/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp index 63f39524d12a..d1dc8e1c0010 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp @@ -712,14 +712,6 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { II.setIsFutureCompatKeyword(false); } - // C++ 2.11p2: If this is an alternative representation of a C++ operator, - // then we act as if it is the actual operator and not the textual - // representation of it. - if (II.isCPlusPlusOperatorKeyword() && - !(getLangOpts().MSVCCompat && - getSourceManager().isInSystemHeader(Identifier.getLocation()))) - Identifier.setIdentifierInfo(nullptr); - // If this is an extension token, diagnose its use. // We avoid diagnosing tokens that originate from macro definitions. // FIXME: This warning is disabled in cases where it shouldn't be, diff --git a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp index 049e046cece1..c2e49ba919a9 100644 --- a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp @@ -67,7 +67,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, // If this is a function-like macro, expand the arguments and change // Tokens to point to the expanded tokens. - if (Macro->isFunctionLike() && Macro->getNumArgs()) + if (Macro->isFunctionLike() && Macro->getNumParams()) ExpandFunctionArguments(); // Mark the macro as currently disabled, so that it is not recursively @@ -122,7 +122,7 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( SmallVectorImpl &ResultToks, bool HasPasteOperator, MacroInfo *Macro, unsigned MacroArgNo, Preprocessor &PP) { // Is the macro argument __VA_ARGS__? - if (!Macro->isVariadic() || MacroArgNo != Macro->getNumArgs()-1) + if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1) return false; // In Microsoft-compatibility mode, a comma is removed in the expansion @@ -137,7 +137,7 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( // with GNU extensions, it is removed regardless of named arguments. // Microsoft also appears to support this extension, unofficially. if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode - && Macro->getNumArgs() < 2) + && Macro->getNumParams() < 2) return false; // Is a comma available to be removed? @@ -193,7 +193,7 @@ void TokenLexer::ExpandFunctionArguments() { NextTokGetsSpace = true; if (CurTok.isOneOf(tok::hash, tok::hashat)) { - int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo()); + int ArgNo = Macro->getParameterNum(Tokens[i+1].getIdentifierInfo()); assert(ArgNo != -1 && "Token following # is not an argument?"); SourceLocation ExpansionLocStart = @@ -237,7 +237,7 @@ void TokenLexer::ExpandFunctionArguments() { // Otherwise, if this is not an argument token, just add the token to the // output buffer. IdentifierInfo *II = CurTok.getIdentifierInfo(); - int ArgNo = II ? Macro->getArgumentNum(II) : -1; + int ArgNo = II ? Macro->getParameterNum(II) : -1; if (ArgNo == -1) { // This isn't an argument, just add it. ResultToks.push_back(CurTok); @@ -330,7 +330,7 @@ void TokenLexer::ExpandFunctionArguments() { // expansion. if (NonEmptyPasteBefore && ResultToks.size() >= 2 && ResultToks[ResultToks.size()-2].is(tok::comma) && - (unsigned)ArgNo == Macro->getNumArgs()-1 && + (unsigned)ArgNo == Macro->getNumParams()-1 && Macro->isVariadic()) { VaArgsPseudoPaste = true; // Remove the paste operator, report use of the extension. diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp index f7410b8a092a..01b1bf48e473 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp @@ -1007,6 +1007,10 @@ IdentifierInfo *Parser::ParseObjCSelectorPiece(SourceLocation &SelectorLoc) { switch (Tok.getKind()) { default: return nullptr; + case tok::colon: + // Empty selector piece uses the location of the ':'. + SelectorLoc = Tok.getLocation(); + return nullptr; case tok::ampamp: case tok::ampequal: case tok::amp: diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp index 2e5e36242ed5..d9a088595ab7 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp @@ -1102,7 +1102,7 @@ bool Parser::ParseOpenMPSimpleVarList( /// simdlen-clause | threads-clause | simd-clause | num_teams-clause | /// thread_limit-clause | priority-clause | grainsize-clause | /// nogroup-clause | num_tasks-clause | hint-clause | to-clause | -/// from-clause | is_device_ptr-clause +/// from-clause | is_device_ptr-clause | task_reduction-clause /// OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, bool FirstClause) { @@ -1220,6 +1220,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_lastprivate: case OMPC_shared: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: @@ -1585,7 +1586,7 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, BalancedDelimiterTracker LinearT(*this, tok::l_paren, tok::annot_pragma_openmp_end); // Handle reduction-identifier for reduction clause. - if (Kind == OMPC_reduction) { + if (Kind == OMPC_reduction || Kind == OMPC_task_reduction) { ColonProtectionRAIIObject ColonRAII(*this); if (getLangOpts().CPlusPlus) ParseOptionalCXXScopeSpecifier(Data.ReductionIdScopeSpec, @@ -1733,13 +1734,13 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, Diag(Tok, diag::warn_pragma_expected_colon) << "map type"; } - bool IsComma = - (Kind != OMPC_reduction && Kind != OMPC_depend && Kind != OMPC_map) || - (Kind == OMPC_reduction && !InvalidReductionId) || - (Kind == OMPC_map && Data.MapType != OMPC_MAP_unknown && - (!MapTypeModifierSpecified || - Data.MapTypeModifier == OMPC_MAP_always)) || - (Kind == OMPC_depend && Data.DepKind != OMPC_DEPEND_unknown); + bool IsComma = (Kind != OMPC_reduction && Kind != OMPC_task_reduction && + Kind != OMPC_depend && Kind != OMPC_map) || + (Kind == OMPC_reduction && !InvalidReductionId) || + (Kind == OMPC_map && Data.MapType != OMPC_MAP_unknown && + (!MapTypeModifierSpecified || + Data.MapTypeModifier == OMPC_MAP_always)) || + (Kind == OMPC_depend && Data.DepKind != OMPC_DEPEND_unknown); const bool MayHaveTail = (Kind == OMPC_linear || Kind == OMPC_aligned); while (IsComma || (Tok.isNot(tok::r_paren) && Tok.isNot(tok::colon) && Tok.isNot(tok::annot_pragma_openmp_end))) { @@ -1795,7 +1796,7 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, } /// \brief Parsing of OpenMP clause 'private', 'firstprivate', 'lastprivate', -/// 'shared', 'copyin', 'copyprivate', 'flush' or 'reduction'. +/// 'shared', 'copyin', 'copyprivate', 'flush', 'reduction' or 'task_reduction'. /// /// private-clause: /// 'private' '(' list ')' @@ -1811,6 +1812,8 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, /// 'aligned' '(' list [ ':' alignment ] ')' /// reduction-clause: /// 'reduction' '(' reduction-identifier ':' list ')' +/// task_reduction-clause: +/// 'task_reduction' '(' reduction-identifier ':' list ')' /// copyprivate-clause: /// 'copyprivate' '(' list ')' /// flush-clause: diff --git a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp index a55cdcccee5d..e4e84fcec954 100644 --- a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp @@ -1082,8 +1082,10 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) { !S.getLangOpts().ZVector) S.Diag(TSTLoc, diag::err_invalid_vector_double_decl_spec); } else if (TypeSpecType == TST_float) { - // vector float is unsupported for ZVector. - if (S.getLangOpts().ZVector) + // vector float is unsupported for ZVector unless we have the + // vector-enhancements facility 1 (ISA revision 12). + if (S.getLangOpts().ZVector && + !S.Context.getTargetInfo().hasFeature("arch12")) S.Diag(TSTLoc, diag::err_invalid_vector_float_decl_spec); } else if (TypeSpecWidth == TSW_long) { // vector long is unsupported for ZVector and deprecated for AltiVec. diff --git a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp index dc9f977d41ac..6f0db6ce1c6a 100644 --- a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp @@ -850,7 +850,8 @@ void Sema::ActOnEndOfTranslationUnit() { emitAndClearUnusedLocalTypedefWarnings(); // Modules don't need any of the checking below. - TUScope = nullptr; + if (!PP.isIncrementalProcessingEnabled()) + TUScope = nullptr; return; } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp index 8446601334ee..b2223b755061 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp @@ -760,6 +760,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (CheckObjCString(TheCall->getArg(0))) return ExprError(); break; + case Builtin::BI__builtin_ms_va_start: case Builtin::BI__builtin_stdarg_start: case Builtin::BI__builtin_va_start: if (SemaBuiltinVAStart(BuiltinID, TheCall)) @@ -1739,9 +1740,11 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, case SystemZ::BI__builtin_s390_vfaezbs: case SystemZ::BI__builtin_s390_vfaezhs: case SystemZ::BI__builtin_s390_vfaezfs: i = 2; l = 0; u = 15; break; + case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15) || SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case SystemZ::BI__builtin_s390_vftcisb: case SystemZ::BI__builtin_s390_vftcidb: i = 1; l = 0; u = 4095; break; case SystemZ::BI__builtin_s390_vlbb: i = 1; l = 0; u = 15; break; case SystemZ::BI__builtin_s390_vpdi: i = 2; l = 0; u = 15; break; @@ -1758,6 +1761,11 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, case SystemZ::BI__builtin_s390_vstrczbs: case SystemZ::BI__builtin_s390_vstrczhs: case SystemZ::BI__builtin_s390_vstrczfs: i = 3; l = 0; u = 15; break; + case SystemZ::BI__builtin_s390_vmslg: i = 3; l = 0; u = 15; break; + case SystemZ::BI__builtin_s390_vfminsb: + case SystemZ::BI__builtin_s390_vfmaxsb: + case SystemZ::BI__builtin_s390_vfmindb: + case SystemZ::BI__builtin_s390_vfmaxdb: i = 2; l = 0; u = 15; break; } return SemaBuiltinConstantArgRange(TheCall, i, l, u); } @@ -2095,9 +2103,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinID == X86::BI__builtin_cpu_supports) return SemaBuiltinCpuSupports(*this, TheCall); - if (BuiltinID == X86::BI__builtin_ms_va_start) - return SemaBuiltinVAStart(BuiltinID, TheCall); - // If the intrinsic has rounding or SAE make sure its valid. if (CheckX86BuiltinRoundingOrSAE(BuiltinID, TheCall)) return true; @@ -3622,24 +3627,25 @@ ExprResult Sema::CheckOSLogFormatStringArg(Expr *Arg) { static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) { const llvm::Triple &TT = S.Context.getTargetInfo().getTriple(); bool IsX64 = TT.getArch() == llvm::Triple::x86_64; + bool IsAArch64 = TT.getArch() == llvm::Triple::aarch64; bool IsWindows = TT.isOSWindows(); - bool IsMSVAStart = BuiltinID == X86::BI__builtin_ms_va_start; - if (IsX64) { + bool IsMSVAStart = BuiltinID == Builtin::BI__builtin_ms_va_start; + if (IsX64 || IsAArch64) { clang::CallingConv CC = CC_C; if (const FunctionDecl *FD = S.getCurFunctionDecl()) CC = FD->getType()->getAs()->getCallConv(); if (IsMSVAStart) { // Don't allow this in System V ABI functions. - if (CC == CC_X86_64SysV || (!IsWindows && CC != CC_X86_64Win64)) + if (CC == CC_X86_64SysV || (!IsWindows && CC != CC_Win64)) return S.Diag(Fn->getLocStart(), diag::err_ms_va_start_used_in_sysv_function); } else { - // On x86-64 Unix, don't allow this in Win64 ABI functions. + // On x86-64/AArch64 Unix, don't allow this in Win64 ABI functions. // On x64 Windows, don't allow this in System V ABI functions. // (Yes, that means there's no corresponding way to support variadic // System V ABI functions on Windows.) if ((IsWindows && CC == CC_X86_64SysV) || - (!IsWindows && CC == CC_X86_64Win64)) + (!IsWindows && CC == CC_Win64)) return S.Diag(Fn->getLocStart(), diag::err_va_start_used_in_wrong_abi_function) << !IsWindows; @@ -3648,7 +3654,7 @@ static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) { } if (IsMSVAStart) - return S.Diag(Fn->getLocStart(), diag::err_x86_builtin_64_only); + return S.Diag(Fn->getLocStart(), diag::err_builtin_x64_aarch64_only); return false; } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp index 83c3bd27596c..3a53f251b096 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp @@ -2738,7 +2738,7 @@ CodeCompletionResult::CreateCodeCompletionString(ASTContext &Ctx, // Format a function-like macro with placeholders for the arguments. Result.AddChunk(CodeCompletionString::CK_LeftParen); - MacroInfo::arg_iterator A = MI->arg_begin(), AEnd = MI->arg_end(); + MacroInfo::param_iterator A = MI->param_begin(), AEnd = MI->param_end(); // C99 variadic macros add __VA_ARGS__ at the end. Skip it. if (MI->isC99Varargs()) { @@ -2749,8 +2749,8 @@ CodeCompletionResult::CreateCodeCompletionString(ASTContext &Ctx, } } - for (MacroInfo::arg_iterator A = MI->arg_begin(); A != AEnd; ++A) { - if (A != MI->arg_begin()) + for (MacroInfo::param_iterator A = MI->param_begin(); A != AEnd; ++A) { + if (A != MI->param_begin()) Result.AddChunk(CodeCompletionString::CK_Comma); if (MI->isVariadic() && (A+1) == AEnd) { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp index 5fb79a6bf630..2a310bf41c70 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp @@ -4280,7 +4280,7 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC, case AttributeList::AT_RegCall: CC = CC_X86RegCall; break; case AttributeList::AT_MSABI: CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_C : - CC_X86_64Win64; + CC_Win64; break; case AttributeList::AT_SysVABI: CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_X86_64SysV : @@ -4679,6 +4679,16 @@ void Sema::AddNSConsumedAttr(SourceRange attrRange, Decl *D, CFConsumedAttr(attrRange, Context, spellingIndex)); } +bool Sema::checkNSReturnsRetainedReturnType(SourceLocation loc, + QualType type) { + if (isValidSubjectOfNSReturnsRetainedAttribute(type)) + return false; + + Diag(loc, diag::warn_ns_attribute_wrong_return_type) + << "'ns_returns_retained'" << 0 << 0; + return true; +} + static void handleNSReturnsRetainedAttr(Sema &S, Decl *D, const AttributeList &Attr) { QualType returnType; @@ -4700,6 +4710,8 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D, << Attr.getRange(); return; } + } else if (Attr.isUsedAsTypeAttr()) { + return; } else { AttributeDeclKind ExpectedDeclKind; switch (Attr.getKind()) { @@ -4743,6 +4755,9 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D, } if (!typeOK) { + if (Attr.isUsedAsTypeAttr()) + return; + if (isa(D)) { S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_parameter_type) << Attr.getName() << /*pointer-to-CF*/2 @@ -6838,6 +6853,50 @@ static const AvailabilityAttr *getAttrForPlatform(ASTContext &Context, return nullptr; } +/// The diagnostic we should emit for \c D, and the declaration that +/// originated it, or \c AR_Available. +/// +/// \param D The declaration to check. +/// \param Message If non-null, this will be populated with the message from +/// the availability attribute that is selected. +static std::pair +ShouldDiagnoseAvailabilityOfDecl(const NamedDecl *D, std::string *Message) { + AvailabilityResult Result = D->getAvailability(Message); + + // For typedefs, if the typedef declaration appears available look + // to the underlying type to see if it is more restrictive. + while (const TypedefNameDecl *TD = dyn_cast(D)) { + if (Result == AR_Available) { + if (const TagType *TT = TD->getUnderlyingType()->getAs()) { + D = TT->getDecl(); + Result = D->getAvailability(Message); + continue; + } + } + break; + } + + // Forward class declarations get their attributes from their definition. + if (const ObjCInterfaceDecl *IDecl = dyn_cast(D)) { + if (IDecl->getDefinition()) { + D = IDecl->getDefinition(); + Result = D->getAvailability(Message); + } + } + + if (const auto *ECD = dyn_cast(D)) + if (Result == AR_Available) { + const DeclContext *DC = ECD->getDeclContext(); + if (const auto *TheEnumDecl = dyn_cast(DC)) { + Result = TheEnumDecl->getAvailability(Message); + D = TheEnumDecl; + } + } + + return {Result, D}; +} + + /// \brief whether we should emit a diagnostic for \c K and \c DeclVersion in /// the context of \c Ctx. For example, we should emit an unavailable diagnostic /// in a deprecated context, but not the other way around. @@ -7205,24 +7264,24 @@ void Sema::redelayDiagnostics(DelayedDiagnosticPool &pool) { curPool->steal(pool); } -void Sema::EmitAvailabilityWarning(AvailabilityResult AR, - const NamedDecl *ReferringDecl, - const NamedDecl *OffendingDecl, - StringRef Message, SourceLocation Loc, - const ObjCInterfaceDecl *UnknownObjCClass, - const ObjCPropertyDecl *ObjCProperty, - bool ObjCPropertyAccess) { +static void EmitAvailabilityWarning(Sema &S, AvailabilityResult AR, + const NamedDecl *ReferringDecl, + const NamedDecl *OffendingDecl, + StringRef Message, SourceLocation Loc, + const ObjCInterfaceDecl *UnknownObjCClass, + const ObjCPropertyDecl *ObjCProperty, + bool ObjCPropertyAccess) { // Delay if we're currently parsing a declaration. - if (DelayedDiagnostics.shouldDelayDiagnostics()) { - DelayedDiagnostics.add( + if (S.DelayedDiagnostics.shouldDelayDiagnostics()) { + S.DelayedDiagnostics.add( DelayedDiagnostic::makeAvailability( AR, Loc, ReferringDecl, OffendingDecl, UnknownObjCClass, ObjCProperty, Message, ObjCPropertyAccess)); return; } - Decl *Ctx = cast(getCurLexicalContext()); - DoEmitAvailabilityWarning(*this, AR, Ctx, ReferringDecl, OffendingDecl, + Decl *Ctx = cast(S.getCurLexicalContext()); + DoEmitAvailabilityWarning(S, AR, Ctx, ReferringDecl, OffendingDecl, Message, Loc, UnknownObjCClass, ObjCProperty, ObjCPropertyAccess); } @@ -7379,7 +7438,7 @@ void DiagnoseUnguardedAvailability::DiagnoseDeclAvailability( AvailabilityResult Result; const NamedDecl *OffendingDecl; std::tie(Result, OffendingDecl) = - SemaRef.ShouldDiagnoseAvailabilityOfDecl(D, nullptr); + ShouldDiagnoseAvailabilityOfDecl(D, nullptr); if (Result != AR_Available) { // All other diagnostic kinds have already been handled in // DiagnoseAvailabilityOfDecl. @@ -7557,3 +7616,44 @@ void Sema::DiagnoseUnguardedAvailabilityViolations(Decl *D) { DiagnoseUnguardedAvailability(*this, D).IssueDiagnostics(Body); } + +void Sema::DiagnoseAvailabilityOfDecl(NamedDecl *D, SourceLocation Loc, + const ObjCInterfaceDecl *UnknownObjCClass, + bool ObjCPropertyAccess, + bool AvoidPartialAvailabilityChecks) { + std::string Message; + AvailabilityResult Result; + const NamedDecl* OffendingDecl; + // See if this declaration is unavailable, deprecated, or partial. + std::tie(Result, OffendingDecl) = ShouldDiagnoseAvailabilityOfDecl(D, &Message); + if (Result == AR_Available) + return; + + if (Result == AR_NotYetIntroduced) { + if (AvoidPartialAvailabilityChecks) + return; + + // We need to know the @available context in the current function to + // diagnose this use, let DiagnoseUnguardedAvailabilityViolations do that + // when we're done parsing the current function. + if (getCurFunctionOrMethodDecl()) { + getEnclosingFunction()->HasPotentialAvailabilityViolations = true; + return; + } else if (getCurBlock() || getCurLambda()) { + getCurFunction()->HasPotentialAvailabilityViolations = true; + return; + } + } + + const ObjCPropertyDecl *ObjCPDecl = nullptr; + if (const ObjCMethodDecl *MD = dyn_cast(D)) { + if (const ObjCPropertyDecl *PD = MD->findPropertyDecl()) { + AvailabilityResult PDeclResult = PD->getAvailability(nullptr); + if (PDeclResult == Result) + ObjCPDecl = PD; + } + } + + EmitAvailabilityWarning(*this, Result, D, OffendingDecl, Message, Loc, + UnknownObjCClass, ObjCPDecl, ObjCPropertyAccess); +} diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp index 778b8062f68c..967573011d0d 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp @@ -248,19 +248,41 @@ bool Sema::CheckARCMethodDecl(ObjCMethodDecl *method) { return false; } -static void DiagnoseObjCImplementedDeprecations(Sema &S, - NamedDecl *ND, - SourceLocation ImplLoc, - int select) { - if (ND && ND->isDeprecated()) { - S.Diag(ImplLoc, diag::warn_deprecated_def) << select; - if (select == 0) +static void DiagnoseObjCImplementedDeprecations(Sema &S, const NamedDecl *ND, + SourceLocation ImplLoc) { + if (!ND) + return; + bool IsCategory = false; + AvailabilityResult Availability = ND->getAvailability(); + if (Availability != AR_Deprecated) { + if (isa(ND)) { + if (Availability != AR_Unavailable) + return; + // Warn about implementing unavailable methods. + S.Diag(ImplLoc, diag::warn_unavailable_def); S.Diag(ND->getLocation(), diag::note_method_declared_at) - << ND->getDeclName(); - else - S.Diag(ND->getLocation(), diag::note_previous_decl) - << (isa(ND) ? "category" : "class"); + << ND->getDeclName(); + return; + } + if (const auto *CD = dyn_cast(ND)) { + if (!CD->getClassInterface()->isDeprecated()) + return; + ND = CD->getClassInterface(); + IsCategory = true; + } else + return; } + S.Diag(ImplLoc, diag::warn_deprecated_def) + << (isa(ND) + ? /*Method*/ 0 + : isa(ND) || IsCategory ? /*Category*/ 2 + : /*Class*/ 1); + if (isa(ND)) + S.Diag(ND->getLocation(), diag::note_method_declared_at) + << ND->getDeclName(); + else + S.Diag(ND->getLocation(), diag::note_previous_decl) + << (isa(ND) ? "category" : "class"); } /// AddAnyMethodToGlobalPool - Add any method, instance or factory to global @@ -385,9 +407,7 @@ void Sema::ActOnStartOfObjCMethodDef(Scope *FnBodyScope, Decl *D) { // No need to issue deprecated warning if deprecated mehod in class/category // is being implemented in its own implementation (no overriding is involved). if (!ImplDeclOfMethodDecl || ImplDeclOfMethodDecl != ImplDeclOfMethodDef) - DiagnoseObjCImplementedDeprecations(*this, - dyn_cast(IMD), - MDecl->getLocation(), 0); + DiagnoseObjCImplementedDeprecations(*this, IMD, MDecl->getLocation()); } if (MDecl->getMethodFamily() == OMF_init) { @@ -1873,10 +1893,8 @@ Decl *Sema::ActOnStartCategoryImplementation( CatIDecl->setImplementation(CDecl); // Warn on implementating category of deprecated class under // -Wdeprecated-implementations flag. - DiagnoseObjCImplementedDeprecations( - *this, - CatIDecl->isDeprecated() ? CatIDecl : dyn_cast(IDecl), - CDecl->getLocation(), 2); + DiagnoseObjCImplementedDeprecations(*this, CatIDecl, + CDecl->getLocation()); } } @@ -1996,9 +2014,7 @@ Decl *Sema::ActOnStartClassImplementation( PushOnScopeChains(IMPDecl, TUScope); // Warn on implementating deprecated class under // -Wdeprecated-implementations flag. - DiagnoseObjCImplementedDeprecations(*this, - dyn_cast(IDecl), - IMPDecl->getLocation(), 1); + DiagnoseObjCImplementedDeprecations(*this, IDecl, IMPDecl->getLocation()); } // If the superclass has the objc_runtime_visible attribute, we diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp index 8016bf99889f..ead80b61586a 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp @@ -87,82 +87,6 @@ static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) { } } -std::pair -Sema::ShouldDiagnoseAvailabilityOfDecl(const NamedDecl *D, - std::string *Message) { - AvailabilityResult Result = D->getAvailability(Message); - - // For typedefs, if the typedef declaration appears available look - // to the underlying type to see if it is more restrictive. - while (const TypedefNameDecl *TD = dyn_cast(D)) { - if (Result == AR_Available) { - if (const TagType *TT = TD->getUnderlyingType()->getAs()) { - D = TT->getDecl(); - Result = D->getAvailability(Message); - continue; - } - } - break; - } - - // Forward class declarations get their attributes from their definition. - if (const ObjCInterfaceDecl *IDecl = dyn_cast(D)) { - if (IDecl->getDefinition()) { - D = IDecl->getDefinition(); - Result = D->getAvailability(Message); - } - } - - if (const auto *ECD = dyn_cast(D)) - if (Result == AR_Available) { - const DeclContext *DC = ECD->getDeclContext(); - if (const auto *TheEnumDecl = dyn_cast(DC)) { - Result = TheEnumDecl->getAvailability(Message); - D = TheEnumDecl; - } - } - - return {Result, D}; -} - -static void -DiagnoseAvailabilityOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc, - const ObjCInterfaceDecl *UnknownObjCClass, - bool ObjCPropertyAccess, - bool AvoidPartialAvailabilityChecks = false) { - std::string Message; - AvailabilityResult Result; - const NamedDecl* OffendingDecl; - // See if this declaration is unavailable, deprecated, or partial. - std::tie(Result, OffendingDecl) = S.ShouldDiagnoseAvailabilityOfDecl(D, &Message); - if (Result == AR_Available) - return; - - if (Result == AR_NotYetIntroduced) { - if (AvoidPartialAvailabilityChecks) - return; - if (S.getCurFunctionOrMethodDecl()) { - S.getEnclosingFunction()->HasPotentialAvailabilityViolations = true; - return; - } else if (S.getCurBlock() || S.getCurLambda()) { - S.getCurFunction()->HasPotentialAvailabilityViolations = true; - return; - } - } - - const ObjCPropertyDecl *ObjCPDecl = nullptr; - if (const ObjCMethodDecl *MD = dyn_cast(D)) { - if (const ObjCPropertyDecl *PD = MD->findPropertyDecl()) { - AvailabilityResult PDeclResult = PD->getAvailability(nullptr); - if (PDeclResult == Result) - ObjCPDecl = PD; - } - } - - S.EmitAvailabilityWarning(Result, D, OffendingDecl, Message, Loc, - UnknownObjCClass, ObjCPDecl, ObjCPropertyAccess); -} - /// \brief Emit a note explaining that this function is deleted. void Sema::NoteDeletedFunction(FunctionDecl *Decl) { assert(Decl->isDeleted()); @@ -363,8 +287,7 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc, return true; } - DiagnoseAvailabilityOfDecl(*this, D, Loc, UnknownObjCClass, - ObjCPropertyAccess, + DiagnoseAvailabilityOfDecl(D, Loc, UnknownObjCClass, ObjCPropertyAccess, AvoidPartialAvailabilityChecks); DiagnoseUnusedOfDecl(*this, D, Loc); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp index 62a771bcffa0..e1e85dfd5e55 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp @@ -814,53 +814,185 @@ static void setImpliedPropertyAttributeForReadOnlyProperty( property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_weak); } -/// DiagnosePropertyMismatchDeclInProtocols - diagnose properties declared -/// in inherited protocols with mismatched types. Since any of them can -/// be candidate for synthesis. -static void -DiagnosePropertyMismatchDeclInProtocols(Sema &S, SourceLocation AtLoc, +static bool +isIncompatiblePropertyAttribute(unsigned Attr1, unsigned Attr2, + ObjCPropertyDecl::PropertyAttributeKind Kind) { + return (Attr1 & Kind) != (Attr2 & Kind); +} + +static bool areIncompatiblePropertyAttributes(unsigned Attr1, unsigned Attr2, + unsigned Kinds) { + return ((Attr1 & Kinds) != 0) != ((Attr2 & Kinds) != 0); +} + +/// SelectPropertyForSynthesisFromProtocols - Finds the most appropriate +/// property declaration that should be synthesised in all of the inherited +/// protocols. It also diagnoses properties declared in inherited protocols with +/// mismatched types or attributes, since any of them can be candidate for +/// synthesis. +static ObjCPropertyDecl * +SelectPropertyForSynthesisFromProtocols(Sema &S, SourceLocation AtLoc, ObjCInterfaceDecl *ClassDecl, ObjCPropertyDecl *Property) { - ObjCInterfaceDecl::ProtocolPropertyMap PropMap; + assert(isa(Property->getDeclContext()) && + "Expected a property from a protocol"); + ObjCInterfaceDecl::ProtocolPropertySet ProtocolSet; + ObjCInterfaceDecl::PropertyDeclOrder Properties; for (const auto *PI : ClassDecl->all_referenced_protocols()) { if (const ObjCProtocolDecl *PDecl = PI->getDefinition()) - PDecl->collectInheritedProtocolProperties(Property, PropMap); + PDecl->collectInheritedProtocolProperties(Property, ProtocolSet, + Properties); } - if (ObjCInterfaceDecl *SDecl = ClassDecl->getSuperClass()) + if (ObjCInterfaceDecl *SDecl = ClassDecl->getSuperClass()) { while (SDecl) { for (const auto *PI : SDecl->all_referenced_protocols()) { if (const ObjCProtocolDecl *PDecl = PI->getDefinition()) - PDecl->collectInheritedProtocolProperties(Property, PropMap); + PDecl->collectInheritedProtocolProperties(Property, ProtocolSet, + Properties); } SDecl = SDecl->getSuperClass(); } - - if (PropMap.empty()) - return; - + } + + if (Properties.empty()) + return Property; + + ObjCPropertyDecl *OriginalProperty = Property; + size_t SelectedIndex = 0; + for (const auto &Prop : llvm::enumerate(Properties)) { + // Select the 'readwrite' property if such property exists. + if (Property->isReadOnly() && !Prop.value()->isReadOnly()) { + Property = Prop.value(); + SelectedIndex = Prop.index(); + } + } + if (Property != OriginalProperty) { + // Check that the old property is compatible with the new one. + Properties[SelectedIndex] = OriginalProperty; + } + QualType RHSType = S.Context.getCanonicalType(Property->getType()); - bool FirsTime = true; - for (ObjCInterfaceDecl::ProtocolPropertyMap::iterator - I = PropMap.begin(), E = PropMap.end(); I != E; I++) { - ObjCPropertyDecl *Prop = I->second; + unsigned OriginalAttributes = Property->getPropertyAttributes(); + enum MismatchKind { + IncompatibleType = 0, + HasNoExpectedAttribute, + HasUnexpectedAttribute, + DifferentGetter, + DifferentSetter + }; + // Represents a property from another protocol that conflicts with the + // selected declaration. + struct MismatchingProperty { + const ObjCPropertyDecl *Prop; + MismatchKind Kind; + StringRef AttributeName; + }; + SmallVector Mismatches; + for (ObjCPropertyDecl *Prop : Properties) { + // Verify the property attributes. + unsigned Attr = Prop->getPropertyAttributes(); + if (Attr != OriginalAttributes) { + auto Diag = [&](bool OriginalHasAttribute, StringRef AttributeName) { + MismatchKind Kind = OriginalHasAttribute ? HasNoExpectedAttribute + : HasUnexpectedAttribute; + Mismatches.push_back({Prop, Kind, AttributeName}); + }; + if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr, + ObjCPropertyDecl::OBJC_PR_copy)) { + Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_copy, "copy"); + continue; + } + if (areIncompatiblePropertyAttributes( + OriginalAttributes, Attr, ObjCPropertyDecl::OBJC_PR_retain | + ObjCPropertyDecl::OBJC_PR_strong)) { + Diag(OriginalAttributes & (ObjCPropertyDecl::OBJC_PR_retain | + ObjCPropertyDecl::OBJC_PR_strong), + "retain (or strong)"); + continue; + } + if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr, + ObjCPropertyDecl::OBJC_PR_atomic)) { + Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_atomic, "atomic"); + continue; + } + } + if (Property->getGetterName() != Prop->getGetterName()) { + Mismatches.push_back({Prop, DifferentGetter, ""}); + continue; + } + if (!Property->isReadOnly() && !Prop->isReadOnly() && + Property->getSetterName() != Prop->getSetterName()) { + Mismatches.push_back({Prop, DifferentSetter, ""}); + continue; + } QualType LHSType = S.Context.getCanonicalType(Prop->getType()); if (!S.Context.propertyTypesAreCompatible(LHSType, RHSType)) { bool IncompatibleObjC = false; QualType ConvertedType; if (!S.isObjCPointerConversion(RHSType, LHSType, ConvertedType, IncompatibleObjC) || IncompatibleObjC) { - if (FirsTime) { - S.Diag(Property->getLocation(), diag::warn_protocol_property_mismatch) - << Property->getType(); - FirsTime = false; - } - S.Diag(Prop->getLocation(), diag::note_protocol_property_declare) - << Prop->getType(); + Mismatches.push_back({Prop, IncompatibleType, ""}); + continue; } } } - if (!FirsTime && AtLoc.isValid()) + + if (Mismatches.empty()) + return Property; + + // Diagnose incompability. + { + bool HasIncompatibleAttributes = false; + for (const auto &Note : Mismatches) + HasIncompatibleAttributes = + Note.Kind != IncompatibleType ? true : HasIncompatibleAttributes; + // Promote the warning to an error if there are incompatible attributes or + // incompatible types together with readwrite/readonly incompatibility. + auto Diag = S.Diag(Property->getLocation(), + Property != OriginalProperty || HasIncompatibleAttributes + ? diag::err_protocol_property_mismatch + : diag::warn_protocol_property_mismatch); + Diag << Mismatches[0].Kind; + switch (Mismatches[0].Kind) { + case IncompatibleType: + Diag << Property->getType(); + break; + case HasNoExpectedAttribute: + case HasUnexpectedAttribute: + Diag << Mismatches[0].AttributeName; + break; + case DifferentGetter: + Diag << Property->getGetterName(); + break; + case DifferentSetter: + Diag << Property->getSetterName(); + break; + } + } + for (const auto &Note : Mismatches) { + auto Diag = + S.Diag(Note.Prop->getLocation(), diag::note_protocol_property_declare) + << Note.Kind; + switch (Note.Kind) { + case IncompatibleType: + Diag << Note.Prop->getType(); + break; + case HasNoExpectedAttribute: + case HasUnexpectedAttribute: + Diag << Note.AttributeName; + break; + case DifferentGetter: + Diag << Note.Prop->getGetterName(); + break; + case DifferentSetter: + Diag << Note.Prop->getSetterName(); + break; + } + } + if (AtLoc.isValid()) S.Diag(AtLoc, diag::note_property_synthesize); + + return Property; } /// Determine whether any storage attributes were written on the property. @@ -996,8 +1128,9 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, } } if (Synthesize && isa(property->getDeclContext())) - DiagnosePropertyMismatchDeclInProtocols(*this, AtLoc, IDecl, property); - + property = SelectPropertyForSynthesisFromProtocols(*this, AtLoc, IDecl, + property); + } else if ((CatImplClass = dyn_cast(ClassImpDecl))) { if (Synthesize) { Diag(AtLoc, diag::err_synthesize_category_decl); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp index 1e0b6c158348..01f574b6aeeb 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp @@ -1807,6 +1807,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(".lb.", KmpUInt64Ty), std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty), std::make_pair(".liter.", KmpInt32Ty), + std::make_pair(".reductions.", + Context.VoidPtrTy.withConst().withRestrict()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, @@ -2498,9 +2500,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPTaskwaitDirective(StartLoc, EndLoc); break; case OMPD_taskgroup: - assert(ClausesWithImplicit.empty() && - "No clauses are allowed for 'omp taskgroup' directive"); - Res = ActOnOpenMPTaskgroupDirective(AStmt, StartLoc, EndLoc); + Res = ActOnOpenMPTaskgroupDirective(ClausesWithImplicit, AStmt, StartLoc, + EndLoc); break; case OMPD_flush: assert(AStmt == nullptr && @@ -5067,7 +5068,8 @@ StmtResult Sema::ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc, return OMPTaskwaitDirective::Create(Context, StartLoc, EndLoc); } -StmtResult Sema::ActOnOpenMPTaskgroupDirective(Stmt *AStmt, +StmtResult Sema::ActOnOpenMPTaskgroupDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { if (!AStmt) @@ -5077,7 +5079,8 @@ StmtResult Sema::ActOnOpenMPTaskgroupDirective(Stmt *AStmt, getCurFunction()->setHasBranchProtectedScope(); - return OMPTaskgroupDirective::Create(Context, StartLoc, EndLoc, AStmt); + return OMPTaskgroupDirective::Create(Context, StartLoc, EndLoc, Clauses, + AStmt); } StmtResult Sema::ActOnOpenMPFlushDirective(ArrayRef Clauses, @@ -6849,6 +6852,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_lastprivate: case OMPC_shared: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: @@ -7152,6 +7156,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_firstprivate: case OMPC_lastprivate: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_linear: case OMPC_default: case OMPC_proc_bind: @@ -7467,6 +7472,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( case OMPC_lastprivate: case OMPC_shared: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: @@ -7624,6 +7630,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_lastprivate: case OMPC_shared: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: @@ -7821,6 +7828,7 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_lastprivate: case OMPC_shared: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: @@ -7933,6 +7941,11 @@ OMPClause *Sema::ActOnOpenMPVarListClause( Res = ActOnOpenMPReductionClause(VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec, ReductionId); break; + case OMPC_task_reduction: + Res = ActOnOpenMPTaskReductionClause(VarList, StartLoc, LParenLoc, ColonLoc, + EndLoc, ReductionIdScopeSpec, + ReductionId); + break; case OMPC_linear: Res = ActOnOpenMPLinearClause(VarList, TailExpr, StartLoc, LParenLoc, LinKind, DepLinMapLoc, ColonLoc, EndLoc); @@ -8901,15 +8914,66 @@ buildDeclareReductionRef(Sema &SemaRef, SourceLocation Loc, SourceRange Range, return ExprEmpty(); } -OMPClause *Sema::ActOnOpenMPReductionClause( +namespace { +/// Data for the reduction-based clauses. +struct ReductionData { + /// List of original reduction items. + SmallVector Vars; + /// List of private copies of the reduction items. + SmallVector Privates; + /// LHS expressions for the reduction_op expressions. + SmallVector LHSs; + /// RHS expressions for the reduction_op expressions. + SmallVector RHSs; + /// Reduction operation expression. + SmallVector ReductionOps; + /// List of captures for clause. + SmallVector ExprCaptures; + /// List of postupdate expressions. + SmallVector ExprPostUpdates; + ReductionData() = delete; + /// Reserves required memory for the reduction data. + ReductionData(unsigned Size) { + Vars.reserve(Size); + Privates.reserve(Size); + LHSs.reserve(Size); + RHSs.reserve(Size); + ReductionOps.reserve(Size); + ExprCaptures.reserve(Size); + ExprPostUpdates.reserve(Size); + } + /// Stores reduction item and reduction operation only (required for dependent + /// reduction item). + void push(Expr *Item, Expr *ReductionOp) { + Vars.emplace_back(Item); + Privates.emplace_back(nullptr); + LHSs.emplace_back(nullptr); + RHSs.emplace_back(nullptr); + ReductionOps.emplace_back(ReductionOp); + } + /// Stores reduction data. + void push(Expr *Item, Expr *Private, Expr *LHS, Expr *RHS, + Expr *ReductionOp) { + Vars.emplace_back(Item); + Privates.emplace_back(Private); + LHSs.emplace_back(LHS); + RHSs.emplace_back(RHS); + ReductionOps.emplace_back(ReductionOp); + } +}; +} // namespace + +static bool ActOnOMPReductionKindClause( + Sema &S, DSAStackTy *Stack, OpenMPClauseKind ClauseKind, ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, - ArrayRef UnresolvedReductions) { + ArrayRef UnresolvedReductions, ReductionData &RD) { auto DN = ReductionId.getName(); auto OOK = DN.getCXXOverloadedOperator(); BinaryOperatorKind BOK = BO_Comma; + ASTContext &Context = S.Context; // OpenMP [2.14.3.6, reduction clause] // C // reduction-identifier is either an identifier or one of the following @@ -8993,13 +9057,6 @@ OMPClause *Sema::ActOnOpenMPReductionClause( ReductionIdRange.setBegin(ReductionIdScopeSpec.getBeginLoc()); ReductionIdRange.setEnd(ReductionId.getEndLoc()); - SmallVector Vars; - SmallVector Privates; - SmallVector LHSs; - SmallVector RHSs; - SmallVector ReductionOps; - SmallVector ExprCaptures; - SmallVector ExprPostUpdates; auto IR = UnresolvedReductions.begin(), ER = UnresolvedReductions.end(); bool FirstIter = true; for (auto RefExpr : VarList) { @@ -9017,27 +9074,23 @@ OMPClause *Sema::ActOnOpenMPReductionClause( SourceLocation ELoc; SourceRange ERange; Expr *SimpleRefExpr = RefExpr; - auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange, + auto Res = getPrivateItem(S, SimpleRefExpr, ELoc, ERange, /*AllowArraySection=*/true); if (Res.second) { - // It will be analyzed later. - Vars.push_back(RefExpr); - Privates.push_back(nullptr); - LHSs.push_back(nullptr); - RHSs.push_back(nullptr); // Try to find 'declare reduction' corresponding construct before using // builtin/overloaded operators. QualType Type = Context.DependentTy; CXXCastPath BasePath; ExprResult DeclareReductionRef = buildDeclareReductionRef( - *this, ELoc, ERange, DSAStack->getCurScope(), ReductionIdScopeSpec, + S, ELoc, ERange, Stack->getCurScope(), ReductionIdScopeSpec, ReductionId, Type, BasePath, IR == ER ? nullptr : *IR); - if (CurContext->isDependentContext() && + Expr *ReductionOp = nullptr; + if (S.CurContext->isDependentContext() && (DeclareReductionRef.isUnset() || isa(DeclareReductionRef.get()))) - ReductionOps.push_back(DeclareReductionRef.get()); - else - ReductionOps.push_back(nullptr); + ReductionOp = DeclareReductionRef.get(); + // It will be analyzed later. + RD.push(RefExpr, ReductionOp); } ValueDecl *D = Res.first; if (!D) @@ -9062,21 +9115,19 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // OpenMP [2.9.3.3, Restrictions, C/C++, p.3] // A variable that appears in a private clause must not have an incomplete // type or a reference type. - if (RequireCompleteType(ELoc, Type, - diag::err_omp_reduction_incomplete_type)) + if (S.RequireCompleteType(ELoc, Type, + diag::err_omp_reduction_incomplete_type)) continue; // OpenMP [2.14.3.6, reduction clause, Restrictions] // A list item that appears in a reduction clause must not be // const-qualified. if (Type.getNonReferenceType().isConstant(Context)) { - Diag(ELoc, diag::err_omp_const_reduction_list_item) - << getOpenMPClauseName(OMPC_reduction) << Type << ERange; + S.Diag(ELoc, diag::err_omp_const_reduction_list_item) << ERange; if (!ASE && !OASE) { - bool IsDecl = !VD || - VD->isThisDeclarationADefinition(Context) == - VarDecl::DeclarationOnly; - Diag(D->getLocation(), - IsDecl ? diag::note_previous_decl : diag::note_defined_here) + bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + VarDecl::DeclarationOnly; + S.Diag(D->getLocation(), + IsDecl ? diag::note_previous_decl : diag::note_defined_here) << D; } continue; @@ -9087,10 +9138,11 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if (!ASE && !OASE && VD) { VarDecl *VDDef = VD->getDefinition(); if (VD->getType()->isReferenceType() && VDDef && VDDef->hasInit()) { - DSARefChecker Check(DSAStack); + DSARefChecker Check(Stack); if (Check.Visit(VDDef->getInit())) { - Diag(ELoc, diag::err_omp_reduction_ref_type_arg) << ERange; - Diag(VDDef->getLocation(), diag::note_defined_here) << VDDef; + S.Diag(ELoc, diag::err_omp_reduction_ref_type_arg) + << getOpenMPClauseName(ClauseKind) << ERange; + S.Diag(VDDef->getLocation(), diag::note_defined_here) << VDDef; continue; } } @@ -9108,17 +9160,17 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // but a list item can appear only once in the reduction clauses for that // directive. DSAStackTy::DSAVarData DVar; - DVar = DSAStack->getTopDSA(D, false); + DVar = Stack->getTopDSA(D, false); if (DVar.CKind == OMPC_reduction) { - Diag(ELoc, diag::err_omp_once_referenced) - << getOpenMPClauseName(OMPC_reduction); + S.Diag(ELoc, diag::err_omp_once_referenced) + << getOpenMPClauseName(ClauseKind); if (DVar.RefExpr) - Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_referenced); + S.Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_referenced); } else if (DVar.CKind != OMPC_unknown) { - Diag(ELoc, diag::err_omp_wrong_dsa) + S.Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind) << getOpenMPClauseName(OMPC_reduction); - ReportOriginalDSA(*this, DSAStack, D, DVar); + ReportOriginalDSA(S, Stack, D, DVar); continue; } @@ -9126,16 +9178,16 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // A list item that appears in a reduction clause of a worksharing // construct must be shared in the parallel regions to which any of the // worksharing regions arising from the worksharing construct bind. - OpenMPDirectiveKind CurrDir = DSAStack->getCurrentDirective(); + OpenMPDirectiveKind CurrDir = Stack->getCurrentDirective(); if (isOpenMPWorksharingDirective(CurrDir) && !isOpenMPParallelDirective(CurrDir) && !isOpenMPTeamsDirective(CurrDir)) { - DVar = DSAStack->getImplicitDSA(D, true); + DVar = Stack->getImplicitDSA(D, true); if (DVar.CKind != OMPC_shared) { - Diag(ELoc, diag::err_omp_required_access) + S.Diag(ELoc, diag::err_omp_required_access) << getOpenMPClauseName(OMPC_reduction) << getOpenMPClauseName(OMPC_shared); - ReportOriginalDSA(*this, DSAStack, D, DVar); + ReportOriginalDSA(S, Stack, D, DVar); continue; } } @@ -9144,24 +9196,20 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // builtin/overloaded operators. CXXCastPath BasePath; ExprResult DeclareReductionRef = buildDeclareReductionRef( - *this, ELoc, ERange, DSAStack->getCurScope(), ReductionIdScopeSpec, + S, ELoc, ERange, Stack->getCurScope(), ReductionIdScopeSpec, ReductionId, Type, BasePath, IR == ER ? nullptr : *IR); if (DeclareReductionRef.isInvalid()) continue; - if (CurContext->isDependentContext() && + if (S.CurContext->isDependentContext() && (DeclareReductionRef.isUnset() || isa(DeclareReductionRef.get()))) { - Vars.push_back(RefExpr); - Privates.push_back(nullptr); - LHSs.push_back(nullptr); - RHSs.push_back(nullptr); - ReductionOps.push_back(DeclareReductionRef.get()); + RD.push(RefExpr, DeclareReductionRef.get()); continue; } if (BOK == BO_Comma && DeclareReductionRef.isUnset()) { // Not allowed reduction identifier is found. - Diag(ReductionId.getLocStart(), - diag::err_omp_unknown_reduction_identifier) + S.Diag(ReductionId.getLocStart(), + diag::err_omp_unknown_reduction_identifier) << Type << ReductionIdRange; continue; } @@ -9177,28 +9225,27 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if (DeclareReductionRef.isUnset()) { if ((BOK == BO_GT || BOK == BO_LT) && !(Type->isScalarType() || - (getLangOpts().CPlusPlus && Type->isArithmeticType()))) { - Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg) - << getLangOpts().CPlusPlus; + (S.getLangOpts().CPlusPlus && Type->isArithmeticType()))) { + S.Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg) + << getOpenMPClauseName(ClauseKind) << S.getLangOpts().CPlusPlus; if (!ASE && !OASE) { - bool IsDecl = !VD || - VD->isThisDeclarationADefinition(Context) == - VarDecl::DeclarationOnly; - Diag(D->getLocation(), - IsDecl ? diag::note_previous_decl : diag::note_defined_here) + bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + VarDecl::DeclarationOnly; + S.Diag(D->getLocation(), + IsDecl ? diag::note_previous_decl : diag::note_defined_here) << D; } continue; } if ((BOK == BO_OrAssign || BOK == BO_AndAssign || BOK == BO_XorAssign) && - !getLangOpts().CPlusPlus && Type->isFloatingType()) { - Diag(ELoc, diag::err_omp_clause_floating_type_arg); + !S.getLangOpts().CPlusPlus && Type->isFloatingType()) { + S.Diag(ELoc, diag::err_omp_clause_floating_type_arg) + << getOpenMPClauseName(ClauseKind); if (!ASE && !OASE) { - bool IsDecl = !VD || - VD->isThisDeclarationADefinition(Context) == - VarDecl::DeclarationOnly; - Diag(D->getLocation(), - IsDecl ? diag::note_previous_decl : diag::note_defined_here) + bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + VarDecl::DeclarationOnly; + S.Diag(D->getLocation(), + IsDecl ? diag::note_previous_decl : diag::note_defined_here) << D; } continue; @@ -9206,9 +9253,9 @@ OMPClause *Sema::ActOnOpenMPReductionClause( } Type = Type.getNonLValueExprType(Context).getUnqualifiedType(); - auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs", + auto *LHSVD = buildVarDecl(S, ELoc, Type, ".reduction.lhs", D->hasAttrs() ? &D->getAttrs() : nullptr); - auto *RHSVD = buildVarDecl(*this, ELoc, Type, D->getName(), + auto *RHSVD = buildVarDecl(S, ELoc, Type, D->getName(), D->hasAttrs() ? &D->getAttrs() : nullptr); auto PrivateTy = Type; if (OASE || @@ -9220,19 +9267,20 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // For array subscripts or single variables Private Ty is the same as Type // (type of the variable or single array element). PrivateTy = Context.getVariableArrayType( - Type, new (Context) OpaqueValueExpr(SourceLocation(), - Context.getSizeType(), VK_RValue), + Type, + new (Context) OpaqueValueExpr(SourceLocation(), Context.getSizeType(), + VK_RValue), ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange()); } else if (!ASE && !OASE && Context.getAsArrayType(D->getType().getNonReferenceType())) PrivateTy = D->getType().getNonReferenceType(); // Private copy. - auto *PrivateVD = buildVarDecl(*this, ELoc, PrivateTy, D->getName(), + auto *PrivateVD = buildVarDecl(S, ELoc, PrivateTy, D->getName(), D->hasAttrs() ? &D->getAttrs() : nullptr); // Add initializer for private variable. Expr *Init = nullptr; - auto *LHSDRE = buildDeclRefExpr(*this, LHSVD, Type, ELoc); - auto *RHSDRE = buildDeclRefExpr(*this, RHSVD, Type, ELoc); + auto *LHSDRE = buildDeclRefExpr(S, LHSVD, Type, ELoc); + auto *RHSDRE = buildDeclRefExpr(S, RHSVD, Type, ELoc); if (DeclareReductionRef.isUsable()) { auto *DRDRef = DeclareReductionRef.getAs(); auto *DRD = cast(DRDRef->getDecl()); @@ -9249,13 +9297,13 @@ OMPClause *Sema::ActOnOpenMPReductionClause( case BO_LOr: // '+', '-', '^', '|', '||' reduction ops - initializer is '0'. if (Type->isScalarType() || Type->isAnyComplexType()) - Init = ActOnIntegerConstant(ELoc, /*Val=*/0).get(); + Init = S.ActOnIntegerConstant(ELoc, /*Val=*/0).get(); break; case BO_Mul: case BO_LAnd: if (Type->isScalarType() || Type->isAnyComplexType()) { // '*' and '&&' reduction ops - initializer is '1'. - Init = ActOnIntegerConstant(ELoc, /*Val=*/1).get(); + Init = S.ActOnIntegerConstant(ELoc, /*Val=*/1).get(); } break; case BO_And: { @@ -9278,7 +9326,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause( if (Init && OrigType->isAnyComplexType()) { // Init = 0xFFFF + 0xFFFFi; auto *Im = new (Context) ImaginaryLiteral(Init, OrigType); - Init = CreateBuiltinBinOp(ELoc, BO_Add, Init, Im).get(); + Init = S.CreateBuiltinBinOp(ELoc, BO_Add, Init, Im).get(); } Type = OrigType; break; @@ -9295,15 +9343,14 @@ OMPClause *Sema::ActOnOpenMPReductionClause( QualType IntTy = Context.getIntTypeForBitwidth(Size, /*Signed=*/IsSigned); llvm::APInt InitValue = - (BOK != BO_LT) - ? IsSigned ? llvm::APInt::getSignedMinValue(Size) - : llvm::APInt::getMinValue(Size) - : IsSigned ? llvm::APInt::getSignedMaxValue(Size) - : llvm::APInt::getMaxValue(Size); + (BOK != BO_LT) ? IsSigned ? llvm::APInt::getSignedMinValue(Size) + : llvm::APInt::getMinValue(Size) + : IsSigned ? llvm::APInt::getSignedMaxValue(Size) + : llvm::APInt::getMaxValue(Size); Init = IntegerLiteral::Create(Context, InitValue, IntTy, ELoc); if (Type->isPointerType()) { // Cast to pointer type. - auto CastExpr = BuildCStyleCastExpr( + auto CastExpr = S.BuildCStyleCastExpr( SourceLocation(), Context.getTrivialTypeSourceInfo(Type, ELoc), SourceLocation(), Init); if (CastExpr.isInvalid()) @@ -9344,20 +9391,19 @@ OMPClause *Sema::ActOnOpenMPReductionClause( llvm_unreachable("Unexpected reduction operation"); } } - if (Init && DeclareReductionRef.isUnset()) { - AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false); - } else if (!Init) - ActOnUninitializedDecl(RHSVD); + if (Init && DeclareReductionRef.isUnset()) + S.AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false); + else if (!Init) + S.ActOnUninitializedDecl(RHSVD); if (RHSVD->isInvalidDecl()) continue; if (!RHSVD->hasInit() && DeclareReductionRef.isUnset()) { - Diag(ELoc, diag::err_omp_reduction_id_not_compatible) << Type - << ReductionIdRange; - bool IsDecl = - !VD || - VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly; - Diag(D->getLocation(), - IsDecl ? diag::note_previous_decl : diag::note_defined_here) + S.Diag(ELoc, diag::err_omp_reduction_id_not_compatible) + << Type << ReductionIdRange; + bool IsDecl = !VD || VD->isThisDeclarationADefinition(Context) == + VarDecl::DeclarationOnly; + S.Diag(D->getLocation(), + IsDecl ? diag::note_previous_decl : diag::note_defined_here) << D; continue; } @@ -9365,16 +9411,16 @@ OMPClause *Sema::ActOnOpenMPReductionClause( // codegen. PrivateVD->setInit(RHSVD->getInit()); PrivateVD->setInitStyle(RHSVD->getInitStyle()); - auto *PrivateDRE = buildDeclRefExpr(*this, PrivateVD, PrivateTy, ELoc); + auto *PrivateDRE = buildDeclRefExpr(S, PrivateVD, PrivateTy, ELoc); ExprResult ReductionOp; if (DeclareReductionRef.isUsable()) { QualType RedTy = DeclareReductionRef.get()->getType(); QualType PtrRedTy = Context.getPointerType(RedTy); - ExprResult LHS = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, LHSDRE); - ExprResult RHS = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, RHSDRE); + ExprResult LHS = S.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, LHSDRE); + ExprResult RHS = S.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, RHSDRE); if (!BasePath.empty()) { - LHS = DefaultLvalueConversion(LHS.get()); - RHS = DefaultLvalueConversion(RHS.get()); + LHS = S.DefaultLvalueConversion(LHS.get()); + RHS = S.DefaultLvalueConversion(RHS.get()); LHS = ImplicitCastExpr::Create(Context, PtrRedTy, CK_UncheckedDerivedToBase, LHS.get(), &BasePath, LHS.get()->getValueKind()); @@ -9387,27 +9433,27 @@ OMPClause *Sema::ActOnOpenMPReductionClause( QualType FnTy = Context.getFunctionType(Context.VoidTy, Params, EPI); auto *OVE = new (Context) OpaqueValueExpr( ELoc, Context.getPointerType(FnTy), VK_RValue, OK_Ordinary, - DefaultLvalueConversion(DeclareReductionRef.get()).get()); + S.DefaultLvalueConversion(DeclareReductionRef.get()).get()); Expr *Args[] = {LHS.get(), RHS.get()}; ReductionOp = new (Context) CallExpr(Context, OVE, Args, Context.VoidTy, VK_RValue, ELoc); } else { - ReductionOp = BuildBinOp(DSAStack->getCurScope(), - ReductionId.getLocStart(), BOK, LHSDRE, RHSDRE); + ReductionOp = S.BuildBinOp( + Stack->getCurScope(), ReductionId.getLocStart(), BOK, LHSDRE, RHSDRE); if (ReductionOp.isUsable()) { if (BOK != BO_LT && BOK != BO_GT) { ReductionOp = - BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(), - BO_Assign, LHSDRE, ReductionOp.get()); + S.BuildBinOp(Stack->getCurScope(), ReductionId.getLocStart(), + BO_Assign, LHSDRE, ReductionOp.get()); } else { auto *ConditionalOp = new (Context) ConditionalOperator( ReductionOp.get(), SourceLocation(), LHSDRE, SourceLocation(), RHSDRE, Type, VK_LValue, OK_Ordinary); ReductionOp = - BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(), - BO_Assign, LHSDRE, ConditionalOp); + S.BuildBinOp(Stack->getCurScope(), ReductionId.getLocStart(), + BO_Assign, LHSDRE, ConditionalOp); } - ReductionOp = ActOnFinishFullExpr(ReductionOp.get()); + ReductionOp = S.ActOnFinishFullExpr(ReductionOp.get()); } if (ReductionOp.isInvalid()) continue; @@ -9415,54 +9461,86 @@ OMPClause *Sema::ActOnOpenMPReductionClause( DeclRefExpr *Ref = nullptr; Expr *VarsExpr = RefExpr->IgnoreParens(); - if (!VD && !CurContext->isDependentContext()) { + if (!VD && !S.CurContext->isDependentContext()) { if (ASE || OASE) { - TransformExprToCaptures RebuildToCapture(*this, D); + TransformExprToCaptures RebuildToCapture(S, D); VarsExpr = RebuildToCapture.TransformExpr(RefExpr->IgnoreParens()).get(); Ref = RebuildToCapture.getCapturedExpr(); } else { - VarsExpr = Ref = - buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false); + VarsExpr = Ref = buildCapture(S, D, SimpleRefExpr, /*WithInit=*/false); } - if (!IsOpenMPCapturedDecl(D)) { - ExprCaptures.push_back(Ref->getDecl()); + if (!S.IsOpenMPCapturedDecl(D)) { + RD.ExprCaptures.emplace_back(Ref->getDecl()); if (Ref->getDecl()->hasAttr()) { - ExprResult RefRes = DefaultLvalueConversion(Ref); + ExprResult RefRes = S.DefaultLvalueConversion(Ref); if (!RefRes.isUsable()) continue; ExprResult PostUpdateRes = - BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign, - SimpleRefExpr, RefRes.get()); + S.BuildBinOp(Stack->getCurScope(), ELoc, BO_Assign, SimpleRefExpr, + RefRes.get()); if (!PostUpdateRes.isUsable()) continue; - if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective())) { - Diag(RefExpr->getExprLoc(), - diag::err_omp_reduction_non_addressable_expression) + if (isOpenMPTaskingDirective(Stack->getCurrentDirective()) || + Stack->getCurrentDirective() == OMPD_taskgroup) { + S.Diag(RefExpr->getExprLoc(), + diag::err_omp_reduction_non_addressable_expression) << RefExpr->getSourceRange(); continue; } - ExprPostUpdates.push_back( - IgnoredValueConversions(PostUpdateRes.get()).get()); + RD.ExprPostUpdates.emplace_back( + S.IgnoredValueConversions(PostUpdateRes.get()).get()); } } } - DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref); - Vars.push_back(VarsExpr); - Privates.push_back(PrivateDRE); - LHSs.push_back(LHSDRE); - RHSs.push_back(RHSDRE); - ReductionOps.push_back(ReductionOp.get()); + // All reduction items are still marked as reduction (to do not increase + // code base size). + Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref); + RD.push(VarsExpr, PrivateDRE, LHSDRE, RHSDRE, ReductionOp.get()); } + return RD.Vars.empty(); +} - if (Vars.empty()) +OMPClause *Sema::ActOnOpenMPReductionClause( + ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation ColonLoc, SourceLocation EndLoc, + CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, + ArrayRef UnresolvedReductions) { + ReductionData RD(VarList.size()); + + if (ActOnOMPReductionKindClause(*this, DSAStack, OMPC_reduction, VarList, + StartLoc, LParenLoc, ColonLoc, EndLoc, + ReductionIdScopeSpec, ReductionId, + UnresolvedReductions, RD)) return nullptr; return OMPReductionClause::Create( - Context, StartLoc, LParenLoc, ColonLoc, EndLoc, Vars, - ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, Privates, - LHSs, RHSs, ReductionOps, buildPreInits(Context, ExprCaptures), - buildPostUpdate(*this, ExprPostUpdates)); + Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars, + ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, + RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, + buildPreInits(Context, RD.ExprCaptures), + buildPostUpdate(*this, RD.ExprPostUpdates)); +} + +OMPClause *Sema::ActOnOpenMPTaskReductionClause( + ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation ColonLoc, SourceLocation EndLoc, + CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, + ArrayRef UnresolvedReductions) { + ReductionData RD(VarList.size()); + + if (ActOnOMPReductionKindClause(*this, DSAStack, OMPC_task_reduction, + VarList, StartLoc, LParenLoc, ColonLoc, + EndLoc, ReductionIdScopeSpec, ReductionId, + UnresolvedReductions, RD)) + return nullptr; + + return OMPTaskReductionClause::Create( + Context, StartLoc, LParenLoc, ColonLoc, EndLoc, RD.Vars, + ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, + RD.Privates, RD.LHSs, RD.RHSs, RD.ReductionOps, + buildPreInits(Context, RD.ExprCaptures), + buildPostUpdate(*this, RD.ExprPostUpdates)); } bool Sema::CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind, diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp index b19dcb2a5099..598a11300b87 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp @@ -120,6 +120,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const AttributeList &attr, // Function type attributes. #define FUNCTION_TYPE_ATTRS_CASELIST \ + case AttributeList::AT_NSReturnsRetained: \ case AttributeList::AT_NoReturn: \ case AttributeList::AT_Regparm: \ case AttributeList::AT_AnyX86NoCallerSavedRegisters: \ @@ -640,12 +641,6 @@ static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state, distributeObjCPointerTypeAttrFromDeclarator(state, *attr, declSpecType); break; - case AttributeList::AT_NSReturnsRetained: - if (!state.getSema().getLangOpts().ObjCAutoRefCount) - break; - // fallthrough - LLVM_FALLTHROUGH; - FUNCTION_TYPE_ATTRS_CASELIST: distributeFunctionTypeAttrFromDeclarator(state, *attr, declSpecType); break; @@ -2385,6 +2380,11 @@ QualType Sema::BuildFunctionType(QualType T, [=](unsigned i) { return Loc; }); } + if (EPI.ExtInfo.getProducesResult()) { + // This is just a warning, so we can't fail to build if we see it. + checkNSReturnsRetainedReturnType(Loc, T); + } + if (Invalid) return QualType(); @@ -5017,6 +5017,8 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) { return AttributeList::AT_TypeNullUnspecified; case AttributedType::attr_objc_kindof: return AttributeList::AT_ObjCKindOf; + case AttributedType::attr_ns_returns_retained: + return AttributeList::AT_NSReturnsRetained; } llvm_unreachable("unexpected attribute kind!"); } @@ -6373,17 +6375,26 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, // ns_returns_retained is not always a type attribute, but if we got // here, we're treating it as one right now. if (attr.getKind() == AttributeList::AT_NSReturnsRetained) { - assert(S.getLangOpts().ObjCAutoRefCount && - "ns_returns_retained treated as type attribute in non-ARC"); if (attr.getNumArgs()) return true; // Delay if this is not a function type. if (!unwrapped.isFunctionType()) return false; - FunctionType::ExtInfo EI - = unwrapped.get()->getExtInfo().withProducesResult(true); - type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI)); + // Check whether the return type is reasonable. + if (S.checkNSReturnsRetainedReturnType(attr.getLoc(), + unwrapped.get()->getReturnType())) + return true; + + // Only actually change the underlying type in ARC builds. + QualType origType = type; + if (state.getSema().getLangOpts().ObjCAutoRefCount) { + FunctionType::ExtInfo EI + = unwrapped.get()->getExtInfo().withProducesResult(true); + type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI)); + } + type = S.Context.getAttributedType(AttributedType::attr_ns_returns_retained, + origType, type); return true; } @@ -6945,12 +6956,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, attr.setUsedAsTypeAttr(); break; - case AttributeList::AT_NSReturnsRetained: - if (!state.getSema().getLangOpts().ObjCAutoRefCount) - break; - // fallthrough into the function attrs - LLVM_FALLTHROUGH; - FUNCTION_TYPE_ATTRS_CASELIST: attr.setUsedAsTypeAttr(); diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h index 7aa8f64d5081..91da9f88c59b 100644 --- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h +++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h @@ -1651,6 +1651,21 @@ class TreeTransform { ReductionId, UnresolvedReductions); } + /// Build a new OpenMP 'task_reduction' clause. + /// + /// By default, performs semantic analysis to build the new statement. + /// Subclasses may override this routine to provide different behavior. + OMPClause *RebuildOMPTaskReductionClause( + ArrayRef VarList, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, + CXXScopeSpec &ReductionIdScopeSpec, + const DeclarationNameInfo &ReductionId, + ArrayRef UnresolvedReductions) { + return getSema().ActOnOpenMPTaskReductionClause( + VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec, + ReductionId, UnresolvedReductions); + } + /// \brief Build a new OpenMP 'linear' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -8399,6 +8414,51 @@ TreeTransform::TransformOMPReductionClause(OMPReductionClause *C) { C->getLocEnd(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions); } +template +OMPClause *TreeTransform::TransformOMPTaskReductionClause( + OMPTaskReductionClause *C) { + llvm::SmallVector Vars; + Vars.reserve(C->varlist_size()); + for (auto *VE : C->varlists()) { + ExprResult EVar = getDerived().TransformExpr(cast(VE)); + if (EVar.isInvalid()) + return nullptr; + Vars.push_back(EVar.get()); + } + CXXScopeSpec ReductionIdScopeSpec; + ReductionIdScopeSpec.Adopt(C->getQualifierLoc()); + + DeclarationNameInfo NameInfo = C->getNameInfo(); + if (NameInfo.getName()) { + NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo); + if (!NameInfo.getName()) + return nullptr; + } + // Build a list of all UDR decls with the same names ranged by the Scopes. + // The Scope boundary is a duplication of the previous decl. + llvm::SmallVector UnresolvedReductions; + for (auto *E : C->reduction_ops()) { + // Transform all the decls. + if (E) { + auto *ULE = cast(E); + UnresolvedSet<8> Decls; + for (auto *D : ULE->decls()) { + NamedDecl *InstD = + cast(getDerived().TransformDecl(E->getExprLoc(), D)); + Decls.addDecl(InstD, InstD->getAccess()); + } + UnresolvedReductions.push_back(UnresolvedLookupExpr::Create( + SemaRef.Context, /*NamingClass=*/nullptr, + ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo, + /*ADL=*/true, ULE->isOverloaded(), Decls.begin(), Decls.end())); + } else + UnresolvedReductions.push_back(nullptr); + } + return getDerived().RebuildOMPTaskReductionClause( + Vars, C->getLocStart(), C->getLParenLoc(), C->getColonLoc(), + C->getLocEnd(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions); +} + template OMPClause * TreeTransform::TransformOMPLinearClause(OMPLinearClause *C) { diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp index 678ecfc9a3d9..50be74f6bf6e 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp @@ -1520,7 +1520,7 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { Stream.JumpToBit(Offset); RecordData Record; - SmallVector MacroArgs; + SmallVector MacroParams; MacroInfo *Macro = nullptr; while (true) { @@ -1571,17 +1571,17 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { bool isC99VarArgs = Record[NextIndex++]; bool isGNUVarArgs = Record[NextIndex++]; bool hasCommaPasting = Record[NextIndex++]; - MacroArgs.clear(); + MacroParams.clear(); unsigned NumArgs = Record[NextIndex++]; for (unsigned i = 0; i != NumArgs; ++i) - MacroArgs.push_back(getLocalIdentifier(F, Record[NextIndex++])); + MacroParams.push_back(getLocalIdentifier(F, Record[NextIndex++])); // Install function-like macro info. MI->setIsFunctionLike(); if (isC99VarArgs) MI->setIsC99Varargs(); if (isGNUVarArgs) MI->setIsGNUVarargs(); if (hasCommaPasting) MI->setHasCommaPasting(); - MI->setArgumentList(MacroArgs, PP.getPreprocessorAllocator()); + MI->setParameterList(MacroParams, PP.getPreprocessorAllocator()); } // Remember that we saw this macro last so that we add the tokens that @@ -9341,6 +9341,8 @@ void ASTReader::diagnoseOdrViolations() { case Decl::Field: return Field; case Decl::CXXMethod: + case Decl::CXXConstructor: + case Decl::CXXDestructor: return CXXMethod; case Decl::TypeAlias: return TypeAlias; @@ -9669,17 +9671,30 @@ void ASTReader::diagnoseOdrViolations() { break; } case CXXMethod: { + enum { + DiagMethod, + DiagConstructor, + DiagDestructor, + } FirstMethodType, + SecondMethodType; + auto GetMethodTypeForDiagnostics = [](const CXXMethodDecl* D) { + if (isa(D)) return DiagConstructor; + if (isa(D)) return DiagDestructor; + return DiagMethod; + }; const CXXMethodDecl *FirstMethod = cast(FirstDecl); const CXXMethodDecl *SecondMethod = cast(SecondDecl); + FirstMethodType = GetMethodTypeForDiagnostics(FirstMethod); + SecondMethodType = GetMethodTypeForDiagnostics(SecondMethod); auto FirstName = FirstMethod->getDeclName(); auto SecondName = SecondMethod->getDeclName(); - if (FirstName != SecondName) { + if (FirstMethodType != SecondMethodType || FirstName != SecondName) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodName) - << FirstName; + << FirstMethodType << FirstName; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodName) - << SecondName; + << SecondMethodType << SecondName; Diagnosed = true; break; @@ -9690,11 +9705,11 @@ void ASTReader::diagnoseOdrViolations() { if (FirstDeleted != SecondDeleted) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodDeleted) - << FirstName << FirstDeleted; + << FirstMethodType << FirstName << FirstDeleted; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodDeleted) - << SecondName << SecondDeleted; + << SecondMethodType << SecondName << SecondDeleted; Diagnosed = true; break; } @@ -9707,10 +9722,10 @@ void ASTReader::diagnoseOdrViolations() { (FirstVirtual != SecondVirtual || FirstPure != SecondPure)) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodVirtual) - << FirstName << FirstPure << FirstVirtual; + << FirstMethodType << FirstName << FirstPure << FirstVirtual; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodVirtual) - << SecondName << SecondPure << SecondVirtual; + << SecondMethodType << SecondName << SecondPure << SecondVirtual; Diagnosed = true; break; } @@ -9725,10 +9740,10 @@ void ASTReader::diagnoseOdrViolations() { if (FirstStatic != SecondStatic) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodStatic) - << FirstName << FirstStatic; + << FirstMethodType << FirstName << FirstStatic; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodStatic) - << SecondName << SecondStatic; + << SecondMethodType << SecondName << SecondStatic; Diagnosed = true; break; } @@ -9738,10 +9753,10 @@ void ASTReader::diagnoseOdrViolations() { if (FirstVolatile != SecondVolatile) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodVolatile) - << FirstName << FirstVolatile; + << FirstMethodType << FirstName << FirstVolatile; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodVolatile) - << SecondName << SecondVolatile; + << SecondMethodType << SecondName << SecondVolatile; Diagnosed = true; break; } @@ -9751,10 +9766,10 @@ void ASTReader::diagnoseOdrViolations() { if (FirstConst != SecondConst) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodConst) - << FirstName << FirstConst; + << FirstMethodType << FirstName << FirstConst; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodConst) - << SecondName << SecondConst; + << SecondMethodType << SecondName << SecondConst; Diagnosed = true; break; } @@ -9764,10 +9779,10 @@ void ASTReader::diagnoseOdrViolations() { if (FirstInline != SecondInline) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodInline) - << FirstName << FirstInline; + << FirstMethodType << FirstName << FirstInline; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodInline) - << SecondName << SecondInline; + << SecondMethodType << SecondName << SecondInline; Diagnosed = true; break; } @@ -9777,10 +9792,10 @@ void ASTReader::diagnoseOdrViolations() { if (FirstNumParameters != SecondNumParameters) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodNumberParameters) - << FirstName << FirstNumParameters; + << FirstMethodType << FirstName << FirstNumParameters; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodNumberParameters) - << SecondName << SecondNumParameters; + << SecondMethodType << SecondName << SecondNumParameters; Diagnosed = true; break; } @@ -9800,24 +9815,27 @@ void ASTReader::diagnoseOdrViolations() { FirstParamType->getAs()) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodParameterType) - << FirstName << (I + 1) << FirstParamType << true - << ParamDecayedType->getOriginalType(); + << FirstMethodType << FirstName << (I + 1) << FirstParamType + << true << ParamDecayedType->getOriginalType(); } else { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodParameterType) - << FirstName << (I + 1) << FirstParamType << false; + << FirstMethodType << FirstName << (I + 1) << FirstParamType + << false; } if (const DecayedType *ParamDecayedType = SecondParamType->getAs()) { ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodParameterType) - << SecondName << (I + 1) << SecondParamType << true + << SecondMethodType << SecondName << (I + 1) + << SecondParamType << true << ParamDecayedType->getOriginalType(); } else { ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodParameterType) - << SecondName << (I + 1) << SecondParamType << false; + << SecondMethodType << SecondName << (I + 1) + << SecondParamType << false; } ParameterMismatch = true; break; @@ -9828,10 +9846,10 @@ void ASTReader::diagnoseOdrViolations() { if (FirstParamName != SecondParamName) { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodParameterName) - << FirstName << (I + 1) << FirstParamName; + << FirstMethodType << FirstName << (I + 1) << FirstParamName; ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodParameterName) - << SecondName << (I + 1) << SecondParamName; + << SecondMethodType << SecondName << (I + 1) << SecondParamName; ParameterMismatch = true; break; } @@ -9842,12 +9860,14 @@ void ASTReader::diagnoseOdrViolations() { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodParameterSingleDefaultArgument) - << FirstName << (I + 1) << (FirstInit == nullptr) + << FirstMethodType << FirstName << (I + 1) + << (FirstInit == nullptr) << (FirstInit ? FirstInit->getSourceRange() : SourceRange()); ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodParameterSingleDefaultArgument) - << SecondName << (I + 1) << (SecondInit == nullptr) + << SecondMethodType << SecondName << (I + 1) + << (SecondInit == nullptr) << (SecondInit ? SecondInit->getSourceRange() : SourceRange()); ParameterMismatch = true; break; @@ -9858,11 +9878,13 @@ void ASTReader::diagnoseOdrViolations() { ODRDiagError(FirstMethod->getLocation(), FirstMethod->getSourceRange(), MethodParameterDifferentDefaultArgument) - << FirstName << (I + 1) << FirstInit->getSourceRange(); + << FirstMethodType << FirstName << (I + 1) + << FirstInit->getSourceRange(); ODRDiagNote(SecondMethod->getLocation(), SecondMethod->getSourceRange(), MethodParameterDifferentDefaultArgument) - << SecondName << (I + 1) << SecondInit->getSourceRange(); + << SecondMethodType << SecondName << (I + 1) + << SecondInit->getSourceRange(); ParameterMismatch = true; break; diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp index afee50ffa3b9..21adcddd3a4a 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1834,6 +1834,9 @@ OMPClause *OMPClauseReader::readClause() { case OMPC_reduction: C = OMPReductionClause::CreateEmpty(Context, Reader->Record.readInt()); break; + case OMPC_task_reduction: + C = OMPTaskReductionClause::CreateEmpty(Context, Reader->Record.readInt()); + break; case OMPC_linear: C = OMPLinearClause::CreateEmpty(Context, Reader->Record.readInt()); break; @@ -2138,6 +2141,40 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) { C->setReductionOps(Vars); } +void OMPClauseReader::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) { + VisitOMPClauseWithPostUpdate(C); + C->setLParenLoc(Reader->ReadSourceLocation()); + C->setColonLoc(Reader->ReadSourceLocation()); + NestedNameSpecifierLoc NNSL = Reader->Record.readNestedNameSpecifierLoc(); + DeclarationNameInfo DNI; + Reader->ReadDeclarationNameInfo(DNI); + C->setQualifierLoc(NNSL); + C->setNameInfo(DNI); + + unsigned NumVars = C->varlist_size(); + SmallVector Vars; + Vars.reserve(NumVars); + for (unsigned I = 0; I != NumVars; ++I) + Vars.push_back(Reader->Record.readSubExpr()); + C->setVarRefs(Vars); + Vars.clear(); + for (unsigned I = 0; I != NumVars; ++I) + Vars.push_back(Reader->Record.readSubExpr()); + C->setPrivates(Vars); + Vars.clear(); + for (unsigned I = 0; I != NumVars; ++I) + Vars.push_back(Reader->Record.readSubExpr()); + C->setLHSExprs(Vars); + Vars.clear(); + for (unsigned I = 0; I != NumVars; ++I) + Vars.push_back(Reader->Record.readSubExpr()); + C->setRHSExprs(Vars); + Vars.clear(); + for (unsigned I = 0; I != NumVars; ++I) + Vars.push_back(Reader->Record.readSubExpr()); + C->setReductionOps(Vars); +} + void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) { VisitOMPClauseWithPostUpdate(C); C->setLParenLoc(Reader->ReadSourceLocation()); @@ -2709,6 +2746,8 @@ void ASTStmtReader::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) { void ASTStmtReader::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) { VisitStmt(D); + // The NumClauses field was read in ReadStmtFromStream. + Record.skipInts(1); VisitOMPExecutableDirective(D); } @@ -3479,7 +3518,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case STMT_OMP_TASKGROUP_DIRECTIVE: - S = OMPTaskgroupDirective::CreateEmpty(Context, Empty); + S = OMPTaskgroupDirective::CreateEmpty( + Context, Record[ASTStmtReader::NumStmtFields], Empty); break; case STMT_OMP_FLUSH_DIRECTIVE: diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp index f7a49e41009d..a875e627bdfb 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp @@ -2521,9 +2521,9 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { Record.push_back(MI->isC99Varargs()); Record.push_back(MI->isGNUVarargs()); Record.push_back(MI->hasCommaPasting()); - Record.push_back(MI->getNumArgs()); - for (const IdentifierInfo *Arg : MI->args()) - AddIdentifierRef(Arg, Record); + Record.push_back(MI->getNumParams()); + for (const IdentifierInfo *Param : MI->params()) + AddIdentifierRef(Param, Record); } // If we have a detailed preprocessing record, record the macro definition diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp index 90a732e575e2..ae2e0b88c311 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1963,6 +1963,25 @@ void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) { Record.AddStmt(E); } +void OMPClauseWriter::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) { + Record.push_back(C->varlist_size()); + VisitOMPClauseWithPostUpdate(C); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getColonLoc()); + Record.AddNestedNameSpecifierLoc(C->getQualifierLoc()); + Record.AddDeclarationNameInfo(C->getNameInfo()); + for (auto *VE : C->varlists()) + Record.AddStmt(VE); + for (auto *VE : C->privates()) + Record.AddStmt(VE); + for (auto *E : C->lhs_exprs()) + Record.AddStmt(E); + for (auto *E : C->rhs_exprs()) + Record.AddStmt(E); + for (auto *E : C->reduction_ops()) + Record.AddStmt(E); +} + void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) { Record.push_back(C->varlist_size()); VisitOMPClauseWithPostUpdate(C); @@ -2440,6 +2459,7 @@ void ASTStmtWriter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) { void ASTStmtWriter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) { VisitStmt(D); + Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); Code = serialization::STMT_OMP_TASKGROUP_DIRECTIVE; } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp index 6bbaaac05e6b..655ce33390c9 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp @@ -57,7 +57,7 @@ struct LocalizedState { }; class NonLocalizedStringChecker - : public Checker> { @@ -79,9 +79,10 @@ class NonLocalizedStringChecker void setNonLocalizedState(SVal S, CheckerContext &C) const; void setLocalizedState(SVal S, CheckerContext &C) const; - bool isAnnotatedAsLocalized(const Decl *D) const; - void reportLocalizationError(SVal S, const ObjCMethodCall &M, - CheckerContext &C, int argumentNumber = 0) const; + bool isAnnotatedAsReturningLocalized(const Decl *D) const; + bool isAnnotatedAsTakingLocalized(const Decl *D) const; + void reportLocalizationError(SVal S, const CallEvent &M, CheckerContext &C, + int argumentNumber = 0) const; int getLocalizedArgumentForSelector(const IdentifierInfo *Receiver, Selector S) const; @@ -97,6 +98,7 @@ class NonLocalizedStringChecker void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; void checkPostObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const; void checkPostStmt(const ObjCStringLiteral *SL, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; }; @@ -644,7 +646,8 @@ void NonLocalizedStringChecker::initLocStringsMethods(ASTContext &Ctx) const { /// Checks to see if the method / function declaration includes /// __attribute__((annotate("returns_localized_nsstring"))) -bool NonLocalizedStringChecker::isAnnotatedAsLocalized(const Decl *D) const { +bool NonLocalizedStringChecker::isAnnotatedAsReturningLocalized( + const Decl *D) const { if (!D) return false; return std::any_of( @@ -654,6 +657,19 @@ bool NonLocalizedStringChecker::isAnnotatedAsLocalized(const Decl *D) const { }); } +/// Checks to see if the method / function declaration includes +/// __attribute__((annotate("takes_localized_nsstring"))) +bool NonLocalizedStringChecker::isAnnotatedAsTakingLocalized( + const Decl *D) const { + if (!D) + return false; + return std::any_of( + D->specific_attr_begin(), + D->specific_attr_end(), [](const AnnotateAttr *Ann) { + return Ann->getAnnotation() == "takes_localized_nsstring"; + }); +} + /// Returns true if the given SVal is marked as Localized in the program state bool NonLocalizedStringChecker::hasLocalizedState(SVal S, CheckerContext &C) const { @@ -733,8 +749,7 @@ static bool isDebuggingContext(CheckerContext &C) { /// Reports a localization error for the passed in method call and SVal void NonLocalizedStringChecker::reportLocalizationError( - SVal S, const ObjCMethodCall &M, CheckerContext &C, - int argumentNumber) const { + SVal S, const CallEvent &M, CheckerContext &C, int argumentNumber) const { // Don't warn about localization errors in classes and methods that // may be debug code. @@ -832,7 +847,21 @@ void NonLocalizedStringChecker::checkPreObjCMessage(const ObjCMethodCall &msg, } } - if (argumentNumber < 0) // There was no match in UIMethods + if (argumentNumber < 0) { // There was no match in UIMethods + if (const Decl *D = msg.getDecl()) { + if (const ObjCMethodDecl *OMD = dyn_cast_or_null(D)) { + auto formals = OMD->parameters(); + for (unsigned i = 0, ei = formals.size(); i != ei; ++i) { + if (isAnnotatedAsTakingLocalized(formals[i])) { + argumentNumber = i; + break; + } + } + } + } + } + + if (argumentNumber < 0) // Still no match return; SVal svTitle = msg.getArgSVal(argumentNumber); @@ -855,6 +884,25 @@ void NonLocalizedStringChecker::checkPreObjCMessage(const ObjCMethodCall &msg, } } +void NonLocalizedStringChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + const Decl *D = Call.getDecl(); + if (D && isa(D)) { + const FunctionDecl *FD = dyn_cast(D); + auto formals = FD->parameters(); + for (unsigned i = 0, + ei = std::min(unsigned(formals.size()), Call.getNumArgs()); + i != ei; ++i) { + if (isAnnotatedAsTakingLocalized(formals[i])) { + auto actual = Call.getArgSVal(i); + if (hasNonLocalizedState(actual, C)) { + reportLocalizationError(actual, Call, C, i + 1); + } + } + } + } +} + static inline bool isNSStringType(QualType T, ASTContext &Ctx) { const ObjCObjectPointerType *PT = T->getAs(); @@ -906,7 +954,7 @@ void NonLocalizedStringChecker::checkPostCall(const CallEvent &Call, const IdentifierInfo *Identifier = Call.getCalleeIdentifier(); SVal sv = Call.getReturnValue(); - if (isAnnotatedAsLocalized(D) || LSF.count(Identifier) != 0) { + if (isAnnotatedAsReturningLocalized(D) || LSF.count(Identifier) != 0) { setLocalizedState(sv, C); } else if (isNSStringType(RT, C.getASTContext()) && !hasLocalizedState(sv, C)) { @@ -940,7 +988,8 @@ void NonLocalizedStringChecker::checkPostObjCMessage(const ObjCMethodCall &msg, std::pair MethodDescription = {odInfo, S}; - if (LSM.count(MethodDescription) || isAnnotatedAsLocalized(msg.getDecl())) { + if (LSM.count(MethodDescription) || + isAnnotatedAsReturningLocalized(msg.getDecl())) { SVal sv = msg.getReturnValue(); setLocalizedState(sv, C); } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp index 89b1291c4f46..21ccf21515b3 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp @@ -1304,6 +1304,21 @@ RetainSummaryManager::getCFSummaryGetRule(const FunctionDecl *FD) { DoNothing, DoNothing); } +/// Returns true if the declaration 'D' is annotated with 'rcAnnotation'. +static bool hasRCAnnotation(const Decl *D, StringRef rcAnnotation) { + for (const auto *Ann : D->specific_attrs()) { + if (Ann->getAnnotation() == rcAnnotation) + return true; + } + return false; +} + +/// Returns true if the function declaration 'FD' contains +/// 'rc_ownership_trusted_implementation' annotate attribute. +static bool isTrustedReferenceCountImplementation(const FunctionDecl *FD) { + return hasRCAnnotation(FD, "rc_ownership_trusted_implementation"); +} + //===----------------------------------------------------------------------===// // Summary creation for Selectors. //===----------------------------------------------------------------------===// @@ -3380,6 +3395,9 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { // See if it's one of the specific functions we know how to eval. bool canEval = false; + // See if the function has 'rc_ownership_trusted_implementation' + // annotate attribute. If it does, we will not inline it. + bool hasTrustedImplementationAnnotation = false; QualType ResultTy = CE->getCallReturnType(C.getASTContext()); if (ResultTy->isObjCIdType()) { @@ -3395,6 +3413,11 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { cocoa::isRefType(ResultTy, "CV", FName)) { canEval = isRetain(FD, FName) || isAutorelease(FD, FName) || isMakeCollectable(FD, FName); + } else { + if (FD->getDefinition()) { + canEval = isTrustedReferenceCountImplementation(FD->getDefinition()); + hasTrustedImplementationAnnotation = canEval; + } } } @@ -3404,8 +3427,11 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { // Bind the return value. const LocationContext *LCtx = C.getLocationContext(); SVal RetVal = state->getSVal(CE->getArg(0), LCtx); - if (RetVal.isUnknown()) { - // If the receiver is unknown, conjure a return value. + if (RetVal.isUnknown() || + (hasTrustedImplementationAnnotation && !ResultTy.isNull())) { + // If the receiver is unknown or the function has + // 'rc_ownership_trusted_implementation' annotate attribute, conjure a + // return value. SValBuilder &SVB = C.getSValBuilder(); RetVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, ResultTy, C.blockCount()); } @@ -3421,8 +3447,9 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { Binding = getRefBinding(state, Sym); // Invalidate the argument region. - state = state->invalidateRegions(ArgRegion, CE, C.blockCount(), LCtx, - /*CausesPointerEscape*/ false); + state = state->invalidateRegions( + ArgRegion, CE, C.blockCount(), LCtx, + /*CausesPointerEscape*/ hasTrustedImplementationAnnotation); // Restore the refcount status of the argument. if (Binding) diff --git a/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp b/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp index f36387dafdbf..3bfb5bbe35e4 100644 --- a/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp +++ b/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp @@ -18,6 +18,7 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Index/USRGeneration.h" #include "clang/Lex/Lexer.h" +#include "clang/Tooling/Refactoring/RecursiveSymbolVisitor.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; @@ -25,132 +26,38 @@ using namespace llvm; namespace clang { namespace tooling { -// NamedDeclFindingASTVisitor recursively visits each AST node to find the -// symbol underneath the cursor. -// FIXME: move to separate .h/.cc file if this gets too large. namespace { -class NamedDeclFindingASTVisitor - : public clang::RecursiveASTVisitor { + +/// Recursively visits each AST node to find the symbol underneath the cursor. +class NamedDeclOccurrenceFindingVisitor + : public RecursiveSymbolVisitor { public: // \brief Finds the NamedDecl at a point in the source. // \param Point the location in the source to search for the NamedDecl. - explicit NamedDeclFindingASTVisitor(const SourceLocation Point, - const ASTContext &Context) - : Result(nullptr), Point(Point), Context(Context) {} + explicit NamedDeclOccurrenceFindingVisitor(const SourceLocation Point, + const ASTContext &Context) + : RecursiveSymbolVisitor(Context.getSourceManager(), + Context.getLangOpts()), + Point(Point), Context(Context) {} - // \brief Finds the NamedDecl for a name in the source. - // \param Name the fully qualified name. - explicit NamedDeclFindingASTVisitor(const std::string &Name, - const ASTContext &Context) - : Result(nullptr), Name(Name), Context(Context) {} - - // Declaration visitors: - - // \brief Checks if the point falls within the NameDecl. This covers every - // declaration of a named entity that we may come across. Usually, just - // checking if the point lies within the length of the name of the declaration - // and the start location is sufficient. - bool VisitNamedDecl(const NamedDecl *Decl) { - return dyn_cast(Decl) - ? true - : setResult(Decl, Decl->getLocation(), - Decl->getNameAsString().length()); - } - - // Expression visitors: - - bool VisitDeclRefExpr(const DeclRefExpr *Expr) { - const NamedDecl *Decl = Expr->getFoundDecl(); - return setResult(Decl, Expr->getLocation(), - Decl->getNameAsString().length()); - } - - bool VisitMemberExpr(const MemberExpr *Expr) { - const NamedDecl *Decl = Expr->getFoundDecl().getDecl(); - return setResult(Decl, Expr->getMemberLoc(), - Decl->getNameAsString().length()); - } - - // Other visitors: - - bool VisitTypeLoc(const TypeLoc Loc) { - const SourceLocation TypeBeginLoc = Loc.getBeginLoc(); - const SourceLocation TypeEndLoc = Lexer::getLocForEndOfToken( - TypeBeginLoc, 0, Context.getSourceManager(), Context.getLangOpts()); - if (const auto *TemplateTypeParm = - dyn_cast(Loc.getType())) - return setResult(TemplateTypeParm->getDecl(), TypeBeginLoc, TypeEndLoc); - if (const auto *TemplateSpecType = - dyn_cast(Loc.getType())) { - return setResult(TemplateSpecType->getTemplateName().getAsTemplateDecl(), - TypeBeginLoc, TypeEndLoc); - } - return setResult(Loc.getType()->getAsCXXRecordDecl(), TypeBeginLoc, - TypeEndLoc); - } - - bool VisitCXXConstructorDecl(clang::CXXConstructorDecl *ConstructorDecl) { - for (const auto *Initializer : ConstructorDecl->inits()) { - // Ignore implicit initializers. - if (!Initializer->isWritten()) - continue; - if (const clang::FieldDecl *FieldDecl = Initializer->getMember()) { - const SourceLocation InitBeginLoc = Initializer->getSourceLocation(), - InitEndLoc = Lexer::getLocForEndOfToken( - InitBeginLoc, 0, Context.getSourceManager(), - Context.getLangOpts()); - if (!setResult(FieldDecl, InitBeginLoc, InitEndLoc)) - return false; - } - } - return true; - } - - // Other: - - const NamedDecl *getNamedDecl() { return Result; } - - // \brief Determines if a namespace qualifier contains the point. - // \returns false on success and sets Result. - void handleNestedNameSpecifierLoc(NestedNameSpecifierLoc NameLoc) { - while (NameLoc) { - const NamespaceDecl *Decl = - NameLoc.getNestedNameSpecifier()->getAsNamespace(); - setResult(Decl, NameLoc.getLocalBeginLoc(), NameLoc.getLocalEndLoc()); - NameLoc = NameLoc.getPrefix(); - } - } - -private: - // \brief Sets Result to Decl if the Point is within Start and End. - // \returns false on success. - bool setResult(const NamedDecl *Decl, SourceLocation Start, - SourceLocation End) { - if (!Decl) + bool visitSymbolOccurrence(const NamedDecl *ND, + ArrayRef NameRanges) { + if (!ND) return true; - if (Name.empty()) { - // Offset is used to find the declaration. + for (const auto &Range : NameRanges) { + SourceLocation Start = Range.getBegin(); + SourceLocation End = Range.getEnd(); if (!Start.isValid() || !Start.isFileID() || !End.isValid() || !End.isFileID() || !isPointWithin(Start, End)) return true; - } else { - // Fully qualified name is used to find the declaration. - if (Name != Decl->getQualifiedNameAsString() && - Name != "::" + Decl->getQualifiedNameAsString()) - return true; } - Result = Decl; + Result = ND; return false; } - // \brief Sets Result to Decl if Point is within Loc and Loc + Offset. - // \returns false on success. - bool setResult(const NamedDecl *Decl, SourceLocation Loc, unsigned Offset) { - // FIXME: Add test for Offset == 0. Add test for Offset - 1 (vs -2 etc). - return Offset == 0 || - setResult(Decl, Loc, Loc.getLocWithOffset(Offset - 1)); - } + const NamedDecl *getNamedDecl() const { return Result; } +private: // \brief Determines if the Point is within Start and End. bool isPointWithin(const SourceLocation Start, const SourceLocation End) { // FIXME: Add tests for Point == End. @@ -160,17 +67,17 @@ class NamedDeclFindingASTVisitor Context.getSourceManager().isBeforeInTranslationUnit(Point, End)); } - const NamedDecl *Result; + const NamedDecl *Result = nullptr; const SourceLocation Point; // The location to find the NamedDecl. - const std::string Name; const ASTContext &Context; }; -} // namespace + +} // end anonymous namespace const NamedDecl *getNamedDeclAt(const ASTContext &Context, const SourceLocation Point) { const SourceManager &SM = Context.getSourceManager(); - NamedDeclFindingASTVisitor Visitor(Point, Context); + NamedDeclOccurrenceFindingVisitor Visitor(Point, Context); // Try to be clever about pruning down the number of top-level declarations we // see. If both start and end is either before or after the point we're @@ -184,18 +91,44 @@ const NamedDecl *getNamedDeclAt(const ASTContext &Context, Visitor.TraverseDecl(CurrDecl); } - NestedNameSpecifierLocFinder Finder(const_cast(Context)); - for (const auto &Location : Finder.getNestedNameSpecifierLocations()) - Visitor.handleNestedNameSpecifierLoc(Location); - return Visitor.getNamedDecl(); } +namespace { + +/// Recursively visits each NamedDecl node to find the declaration with a +/// specific name. +class NamedDeclFindingVisitor + : public RecursiveASTVisitor { +public: + explicit NamedDeclFindingVisitor(StringRef Name) : Name(Name) {} + + // We don't have to traverse the uses to find some declaration with a + // specific name, so just visit the named declarations. + bool VisitNamedDecl(const NamedDecl *ND) { + if (!ND) + return true; + // Fully qualified name is used to find the declaration. + if (Name != ND->getQualifiedNameAsString() && + Name != "::" + ND->getQualifiedNameAsString()) + return true; + Result = ND; + return false; + } + + const NamedDecl *getNamedDecl() const { return Result; } + +private: + const NamedDecl *Result = nullptr; + StringRef Name; +}; + +} // end anonymous namespace + const NamedDecl *getNamedDeclFor(const ASTContext &Context, const std::string &Name) { - NamedDeclFindingASTVisitor Visitor(Name, Context); + NamedDeclFindingVisitor Visitor(Name); Visitor.TraverseDecl(Context.getTranslationUnitDecl()); - return Visitor.getNamedDecl(); } diff --git a/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp b/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp index 934507fe6eae..dc21a94610cb 100644 --- a/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp +++ b/contrib/llvm/tools/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp @@ -22,6 +22,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/Core/Lookup.h" +#include "clang/Tooling/Refactoring/RecursiveSymbolVisitor.h" #include "clang/Tooling/Refactoring/Rename/USRFinder.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" @@ -40,70 +41,27 @@ namespace { // \brief This visitor recursively searches for all instances of a USR in a // translation unit and stores them for later usage. class USRLocFindingASTVisitor - : public clang::RecursiveASTVisitor { + : public RecursiveSymbolVisitor { public: explicit USRLocFindingASTVisitor(const std::vector &USRs, StringRef PrevName, const ASTContext &Context) - : USRSet(USRs.begin(), USRs.end()), PrevName(PrevName), Context(Context) { + : RecursiveSymbolVisitor(Context.getSourceManager(), + Context.getLangOpts()), + USRSet(USRs.begin(), USRs.end()), PrevName(PrevName), Context(Context) { } - // Declaration visitors: - - bool VisitCXXConstructorDecl(clang::CXXConstructorDecl *ConstructorDecl) { - for (const auto *Initializer : ConstructorDecl->inits()) { - // Ignore implicit initializers. - if (!Initializer->isWritten()) - continue; - if (const clang::FieldDecl *FieldDecl = Initializer->getMember()) { - if (USRSet.find(getUSRForDecl(FieldDecl)) != USRSet.end()) - LocationsFound.push_back(Initializer->getSourceLocation()); - } - } - return true; - } - - bool VisitNamedDecl(const NamedDecl *Decl) { - if (USRSet.find(getUSRForDecl(Decl)) != USRSet.end()) - checkAndAddLocation(Decl->getLocation()); - return true; - } - - // Expression visitors: - - bool VisitDeclRefExpr(const DeclRefExpr *Expr) { - const NamedDecl *Decl = Expr->getFoundDecl(); - - if (USRSet.find(getUSRForDecl(Decl)) != USRSet.end()) { - const SourceManager &Manager = Decl->getASTContext().getSourceManager(); - SourceLocation Location = Manager.getSpellingLoc(Expr->getLocation()); - checkAndAddLocation(Location); - } - - return true; - } - - bool VisitMemberExpr(const MemberExpr *Expr) { - const NamedDecl *Decl = Expr->getFoundDecl().getDecl(); - if (USRSet.find(getUSRForDecl(Decl)) != USRSet.end()) { - const SourceManager &Manager = Decl->getASTContext().getSourceManager(); - SourceLocation Location = Manager.getSpellingLoc(Expr->getMemberLoc()); - checkAndAddLocation(Location); - } - return true; - } - - // Other visitors: - - bool VisitTypeLoc(const TypeLoc Loc) { - if (USRSet.find(getUSRForDecl(Loc.getType()->getAsCXXRecordDecl())) != - USRSet.end()) - checkAndAddLocation(Loc.getBeginLoc()); - if (const auto *TemplateTypeParm = - dyn_cast(Loc.getType())) { - if (USRSet.find(getUSRForDecl(TemplateTypeParm->getDecl())) != - USRSet.end()) - checkAndAddLocation(Loc.getBeginLoc()); + bool visitSymbolOccurrence(const NamedDecl *ND, + ArrayRef NameRanges) { + if (USRSet.find(getUSRForDecl(ND)) != USRSet.end()) { + assert(NameRanges.size() == 1 && + "Multiple name pieces are not supported yet!"); + SourceLocation Loc = NameRanges[0].getBegin(); + const SourceManager &SM = Context.getSourceManager(); + // TODO: Deal with macro occurrences correctly. + if (Loc.isMacroID()) + Loc = SM.getSpellingLoc(Loc); + checkAndAddLocation(Loc); } return true; } @@ -116,17 +74,6 @@ class USRLocFindingASTVisitor return LocationsFound; } - // Namespace traversal: - void handleNestedNameSpecifierLoc(NestedNameSpecifierLoc NameLoc) { - while (NameLoc) { - const NamespaceDecl *Decl = - NameLoc.getNestedNameSpecifier()->getAsNamespace(); - if (Decl && USRSet.find(getUSRForDecl(Decl)) != USRSet.end()) - checkAndAddLocation(NameLoc.getLocalBeginLoc()); - NameLoc = NameLoc.getPrefix(); - } - } - private: void checkAndAddLocation(SourceLocation Loc) { const SourceLocation BeginLoc = Loc; @@ -449,11 +396,6 @@ getLocationsOfUSRs(const std::vector &USRs, StringRef PrevName, Decl *Decl) { USRLocFindingASTVisitor Visitor(USRs, PrevName, Decl->getASTContext()); Visitor.TraverseDecl(Decl); - NestedNameSpecifierLocFinder Finder(Decl->getASTContext()); - - for (const auto &Location : Finder.getNestedNameSpecifierLocations()) - Visitor.handleNestedNameSpecifierLoc(Location); - return Visitor.getLocationsFound(); } diff --git a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp index c84fbf473753..662f02dca2a6 100644 --- a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp +++ b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp @@ -336,6 +336,7 @@ ClangTool::ClangTool(const CompilationDatabase &Compilations, OverlayFileSystem->pushOverlay(InMemoryFileSystem); appendArgumentsAdjuster(getClangStripOutputAdjuster()); appendArgumentsAdjuster(getClangSyntaxOnlyAdjuster()); + appendArgumentsAdjuster(getClangStripDependencyFileAdjuster()); } ClangTool::~ClangTool() {} diff --git a/contrib/llvm/tools/lld/COFF/Chunks.cpp b/contrib/llvm/tools/lld/COFF/Chunks.cpp index c0996f55f9d1..7d93c28c86c8 100644 --- a/contrib/llvm/tools/lld/COFF/Chunks.cpp +++ b/contrib/llvm/tools/lld/COFF/Chunks.cpp @@ -210,7 +210,15 @@ void SectionChunk::writeTo(uint8_t *Buf) const { memcpy(Buf + OutputSectionOff, A.data(), A.size()); // Apply relocations. + size_t InputSize = getSize(); for (const coff_relocation &Rel : Relocs) { + // Check for an invalid relocation offset. This check isn't perfect, because + // we don't have the relocation size, which is only known after checking the + // machine and relocation type. As a result, a relocation may overwrite the + // beginning of the following input section. + if (Rel.VirtualAddress >= InputSize) + fatal("relocation points beyond the end of its parent section"); + uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; // Get the output section of the symbol for this relocation. The output @@ -227,7 +235,7 @@ void SectionChunk::writeTo(uint8_t *Buf) const { // sections are not GC roots and can end up with these kinds of relocations. // Skip these relocations. if (!OS && !isa(Sym) && !isa(Sym)) { - if (isCodeView()) + if (isCodeView() || isDWARF()) continue; fatal("relocation against symbol in discarded section: " + Sym->getName()); diff --git a/contrib/llvm/tools/lld/COFF/Chunks.h b/contrib/llvm/tools/lld/COFF/Chunks.h index fc3f5d0df4b6..ece5419e255e 100644 --- a/contrib/llvm/tools/lld/COFF/Chunks.h +++ b/contrib/llvm/tools/lld/COFF/Chunks.h @@ -112,7 +112,7 @@ class Chunk { }; // A chunk corresponding a section of an input file. -class SectionChunk : public Chunk { +class SectionChunk final : public Chunk { // Identical COMDAT Folding feature accesses section internal data. friend class ICF; @@ -188,6 +188,9 @@ class SectionChunk : public Chunk { return SectionName == ".debug" || SectionName.startswith(".debug$"); } + // True if this is a DWARF debug info chunk. + bool isDWARF() const { return SectionName.startswith(".debug_"); } + // Allow iteration over the bodies of this chunk's relocated symbols. llvm::iterator_range symbols() const { return llvm::make_range(symbol_iterator(File, Relocs.begin()), diff --git a/contrib/llvm/tools/lld/COFF/Config.h b/contrib/llvm/tools/lld/COFF/Config.h index 25fdc7abd67b..a58e7d5585f2 100644 --- a/contrib/llvm/tools/lld/COFF/Config.h +++ b/contrib/llvm/tools/lld/COFF/Config.h @@ -82,6 +82,7 @@ struct Configuration { SymbolBody *Entry = nullptr; bool NoEntry = false; std::string OutputFile; + std::string ImportName; bool ColorDiagnostics; bool DoGC = true; bool DoICF = true; diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp index 3620297b8b94..35f4a04866c5 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.cpp +++ b/contrib/llvm/tools/lld/COFF/Driver.cpp @@ -429,7 +429,32 @@ static std::string getImplibPath() { return Out.str(); } -static void createImportLibrary() { +// +// The import name is caculated as the following: +// +// | LIBRARY w/ ext | LIBRARY w/o ext | no LIBRARY +// -----+----------------+---------------------+------------------ +// LINK | {value} | {value}.{.dll/.exe} | {output name} +// LIB | {value} | {value}.dll | {output name}.dll +// +static std::string getImportName(bool AsLib) { + SmallString<128> Out; + + if (Config->ImportName.empty()) { + Out.assign(sys::path::filename(Config->OutputFile)); + if (AsLib) + sys::path::replace_extension(Out, ".dll"); + } else { + Out.assign(Config->ImportName); + if (!sys::path::has_extension(Out)) + sys::path::replace_extension(Out, + (Config->DLL || AsLib) ? ".dll" : ".exe"); + } + + return Out.str(); +} + +static void createImportLibrary(bool AsLib) { std::vector Exports; for (Export &E1 : Config->Exports) { COFFShortExport E2; @@ -444,9 +469,8 @@ static void createImportLibrary() { Exports.push_back(E2); } - std::string DLLName = sys::path::filename(Config->OutputFile); - std::string Path = getImplibPath(); - writeImportLibrary(DLLName, Path, Exports, Config->Machine); + writeImportLibrary(getImportName(AsLib), getImplibPath(), Exports, + Config->Machine); } static void parseModuleDefs(StringRef Path) { @@ -457,6 +481,7 @@ static void parseModuleDefs(StringRef Path) { if (Config->OutputFile.empty()) Config->OutputFile = Saver.save(M.OutputFile); + Config->ImportName = Saver.save(M.ImportName); if (M.ImageBase) Config->ImageBase = M.ImageBase; if (M.StackReserve) @@ -992,7 +1017,7 @@ void LinkerDriver::link(ArrayRef ArgsArr) { // Handle generation of import library from a def file. if (!Args.hasArgNoClaim(OPT_INPUT)) { fixupExports(); - createImportLibrary(); + createImportLibrary(/*AsLib=*/true); exit(0); } @@ -1117,7 +1142,7 @@ void LinkerDriver::link(ArrayRef ArgsArr) { // need to create a .lib file. if (!Config->Exports.empty() || Config->DLL) { fixupExports(); - createImportLibrary(); + createImportLibrary(/*AsLib=*/false); assignExportOrdinals(); } diff --git a/contrib/llvm/tools/lld/COFF/PDB.cpp b/contrib/llvm/tools/lld/COFF/PDB.cpp index 508f59e3af1f..89462da93454 100644 --- a/contrib/llvm/tools/lld/COFF/PDB.cpp +++ b/contrib/llvm/tools/lld/COFF/PDB.cpp @@ -14,28 +14,29 @@ #include "SymbolTable.h" #include "Symbols.h" #include "llvm/DebugInfo/CodeView/CVDebugRecord.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" -#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" -#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" +#include "llvm/DebugInfo/PDB/PDB.h" #include "llvm/Object/COFF.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/Endian.h" @@ -53,8 +54,81 @@ using llvm::object::coff_section; static ExitOnError ExitOnErr; +namespace { +/// Map from type index and item index in a type server PDB to the +/// corresponding index in the destination PDB. +struct CVIndexMap { + SmallVector TPIMap; + SmallVector IPIMap; + bool IsTypeServerMap = false; +}; + +class PDBLinker { +public: + PDBLinker(SymbolTable *Symtab) + : Alloc(), Symtab(Symtab), Builder(Alloc), TypeTable(Alloc), + IDTable(Alloc) {} + + /// Emit the basic PDB structure: initial streams, headers, etc. + void initialize(const llvm::codeview::DebugInfo *DI); + + /// Link CodeView from each object file in the symbol table into the PDB. + void addObjectsToPDB(); + + /// Link CodeView from a single object file into the PDB. + void addObjectFile(ObjectFile *File); + + /// Produce a mapping from the type and item indices used in the object + /// file to those in the destination PDB. + /// + /// If the object file uses a type server PDB (compiled with /Zi), merge TPI + /// and IPI from the type server PDB and return a map for it. Each unique type + /// server PDB is merged at most once, so this may return an existing index + /// mapping. + /// + /// If the object does not use a type server PDB (compiled with /Z7), we merge + /// all the type and item records from the .debug$S stream and fill in the + /// caller-provided ObjectIndexMap. + const CVIndexMap &mergeDebugT(ObjectFile *File, CVIndexMap &ObjectIndexMap); + + const CVIndexMap &maybeMergeTypeServerPDB(ObjectFile *File, + TypeServer2Record &TS); + + /// Add the section map and section contributions to the PDB. + void addSections(ArrayRef SectionTable); + + /// Write the PDB to disk. + void commit(); + +private: + BumpPtrAllocator Alloc; + + SymbolTable *Symtab; + + pdb::PDBFileBuilder Builder; + + /// Type records that will go into the PDB TPI stream. + TypeTableBuilder TypeTable; + + /// Item records that will go into the PDB IPI stream. + TypeTableBuilder IDTable; + + /// PDBs use a single global string table for filenames in the file checksum + /// table. + DebugStringTableSubsection PDBStrTab; + + llvm::SmallString<128> NativePath; + + std::vector SectionMap; + + /// Type index mappings of type server PDBs that we've loaded so far. + std::map TypeServerIndexMappings; +}; +} + // Returns a list of all SectionChunks. -static void addSectionContribs(SymbolTable *Symtab, pdb::DbiStreamBuilder &DbiBuilder) { +static void addSectionContribs(SymbolTable *Symtab, + pdb::DbiStreamBuilder &DbiBuilder) { for (Chunk *C : Symtab->getChunks()) if (auto *SC = dyn_cast(C)) DbiBuilder.addSectionContrib(SC->File->ModuleDBI, SC->Header); @@ -96,24 +170,115 @@ static void addTypeInfo(pdb::TpiStreamBuilder &TpiBuilder, }); } -static void mergeDebugT(ObjectFile *File, - TypeTableBuilder &IDTable, - TypeTableBuilder &TypeTable, - SmallVectorImpl &TypeIndexMap, - pdb::PDBTypeServerHandler &Handler) { +static Optional +maybeReadTypeServerRecord(CVTypeArray &Types) { + auto I = Types.begin(); + if (I == Types.end()) + return None; + const CVType &Type = *I; + if (Type.kind() != LF_TYPESERVER2) + return None; + TypeServer2Record TS; + if (auto EC = TypeDeserializer::deserializeAs(const_cast(Type), TS)) + fatal(EC, "error reading type server record"); + return std::move(TS); +} + +const CVIndexMap &PDBLinker::mergeDebugT(ObjectFile *File, + CVIndexMap &ObjectIndexMap) { ArrayRef Data = getDebugSection(File, ".debug$T"); if (Data.empty()) - return; + return ObjectIndexMap; BinaryByteStream Stream(Data, support::little); CVTypeArray Types; BinaryStreamReader Reader(Stream); - Handler.addSearchPath(sys::path::parent_path(File->getName())); if (auto EC = Reader.readArray(Types, Reader.getLength())) fatal(EC, "Reader::readArray failed"); + + // Look through type servers. If we've already seen this type server, don't + // merge any type information. + if (Optional TS = maybeReadTypeServerRecord(Types)) + return maybeMergeTypeServerPDB(File, *TS); + + // This is a /Z7 object. Fill in the temporary, caller-provided + // ObjectIndexMap. if (auto Err = mergeTypeAndIdRecords(IDTable, TypeTable, - TypeIndexMap, &Handler, Types)) - fatal(Err, "codeview::mergeTypeStreams failed"); + ObjectIndexMap.TPIMap, Types)) + fatal(Err, "codeview::mergeTypeAndIdRecords failed"); + return ObjectIndexMap; +} + +static Expected> +tryToLoadPDB(const GUID &GuidFromObj, StringRef TSPath) { + std::unique_ptr ThisSession; + if (auto EC = + pdb::loadDataForPDB(pdb::PDB_ReaderType::Native, TSPath, ThisSession)) + return std::move(EC); + + std::unique_ptr NS( + static_cast(ThisSession.release())); + pdb::PDBFile &File = NS->getPDBFile(); + auto ExpectedInfo = File.getPDBInfoStream(); + // All PDB Files should have an Info stream. + if (!ExpectedInfo) + return ExpectedInfo.takeError(); + + // Just because a file with a matching name was found and it was an actual + // PDB file doesn't mean it matches. For it to match the InfoStream's GUID + // must match the GUID specified in the TypeServer2 record. + if (ExpectedInfo->getGuid() != GuidFromObj) + return make_error( + pdb::generic_error_code::type_server_not_found, TSPath); + + return std::move(NS); +} + +const CVIndexMap &PDBLinker::maybeMergeTypeServerPDB(ObjectFile *File, + TypeServer2Record &TS) { + // First, check if we already loaded a PDB with this GUID. Return the type + // index mapping if we have it. + auto Insertion = TypeServerIndexMappings.insert({TS.getGuid(), CVIndexMap()}); + CVIndexMap &IndexMap = Insertion.first->second; + if (!Insertion.second) + return IndexMap; + + // Mark this map as a type server map. + IndexMap.IsTypeServerMap = true; + + // Check for a PDB at: + // 1. The given file path + // 2. Next to the object file or archive file + auto ExpectedSession = tryToLoadPDB(TS.getGuid(), TS.getName()); + if (!ExpectedSession) { + consumeError(ExpectedSession.takeError()); + StringRef LocalPath = + !File->ParentName.empty() ? File->ParentName : File->getName(); + SmallString<128> Path = sys::path::parent_path(LocalPath); + sys::path::append( + Path, sys::path::filename(TS.getName(), sys::path::Style::windows)); + ExpectedSession = tryToLoadPDB(TS.getGuid(), Path); + } + if (auto E = ExpectedSession.takeError()) + fatal(E, "Type server PDB was not found"); + + // Merge TPI first, because the IPI stream will reference type indices. + auto ExpectedTpi = (*ExpectedSession)->getPDBFile().getPDBTpiStream(); + if (auto E = ExpectedTpi.takeError()) + fatal(E, "Type server does not have TPI stream"); + if (auto Err = mergeTypeRecords(TypeTable, IndexMap.TPIMap, + ExpectedTpi->typeArray())) + fatal(Err, "codeview::mergeTypeRecords failed"); + + // Merge IPI. + auto ExpectedIpi = (*ExpectedSession)->getPDBFile().getPDBIpiStream(); + if (auto E = ExpectedIpi.takeError()) + fatal(E, "Type server does not have TPI stream"); + if (auto Err = mergeIdRecords(IDTable, IndexMap.TPIMap, IndexMap.IPIMap, + ExpectedIpi->typeArray())) + fatal(Err, "codeview::mergeIdRecords failed"); + + return IndexMap; } static bool remapTypeIndex(TypeIndex &TI, ArrayRef TypeIndexMap) { @@ -127,16 +292,22 @@ static bool remapTypeIndex(TypeIndex &TI, ArrayRef TypeIndexMap) { static void remapTypesInSymbolRecord(ObjectFile *File, MutableArrayRef Contents, - ArrayRef TypeIndexMap, + const CVIndexMap &IndexMap, ArrayRef TypeRefs) { for (const TiReference &Ref : TypeRefs) { unsigned ByteSize = Ref.Count * sizeof(TypeIndex); if (Contents.size() < Ref.Offset + ByteSize) fatal("symbol record too short"); + + // This can be an item index or a type index. Choose the appropriate map. + ArrayRef TypeOrItemMap = IndexMap.TPIMap; + if (Ref.Kind == TiRefKind::IndexRef && IndexMap.IsTypeServerMap) + TypeOrItemMap = IndexMap.IPIMap; + MutableArrayRef TIs( reinterpret_cast(Contents.data() + Ref.Offset), Ref.Count); for (TypeIndex &TI : TIs) { - if (!remapTypeIndex(TI, TypeIndexMap)) { + if (!remapTypeIndex(TI, TypeOrItemMap)) { TI = TypeIndex(SimpleTypeKind::NotTranslated); log("ignoring symbol record in " + File->getName() + " with bad type index 0x" + utohexstr(TI.getIndex())); @@ -241,7 +412,7 @@ static void scopeStackClose(SmallVectorImpl &Stack, } static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjectFile *File, - ArrayRef TypeIndexMap, + const CVIndexMap &IndexMap, BinaryStreamRef SymData) { // FIXME: Improve error recovery by warning and skipping records when // possible. @@ -264,7 +435,7 @@ static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjectFile *File, // Re-map all the type index references. MutableArrayRef Contents = NewData.drop_front(sizeof(RecordPrefix)); - remapTypesInSymbolRecord(File, Contents, TypeIndexMap, TypeRefs); + remapTypesInSymbolRecord(File, Contents, IndexMap, TypeRefs); // Fill in "Parent" and "End" fields by maintaining a stack of scopes. CVSymbol NewSym(Sym.kind(), NewData); @@ -289,110 +460,105 @@ static ArrayRef relocateDebugChunk(BumpPtrAllocator &Alloc, ".debug$S"); } -// Add all object files to the PDB. Merge .debug$T sections into IpiData and -// TpiData. -static void addObjectsToPDB(BumpPtrAllocator &Alloc, SymbolTable *Symtab, - pdb::PDBFileBuilder &Builder, - TypeTableBuilder &TypeTable, - TypeTableBuilder &IDTable) { - // Follow type servers. If the same type server is encountered more than - // once for this instance of `PDBTypeServerHandler` (for example if many - // object files reference the same TypeServer), the types from the - // TypeServer will only be visited once. - pdb::PDBTypeServerHandler Handler; +void PDBLinker::addObjectFile(ObjectFile *File) { + // Add a module descriptor for every object file. We need to put an absolute + // path to the object into the PDB. If this is a plain object, we make its + // path absolute. If it's an object in an archive, we make the archive path + // absolute. + bool InArchive = !File->ParentName.empty(); + SmallString<128> Path = InArchive ? File->ParentName : File->getName(); + sys::fs::make_absolute(Path); + sys::path::native(Path, sys::path::Style::windows); + StringRef Name = InArchive ? File->getName() : StringRef(Path); - // PDBs use a single global string table for filenames in the file checksum - // table. - auto PDBStrTab = std::make_shared(); + File->ModuleDBI = &ExitOnErr(Builder.getDbiBuilder().addModuleInfo(Name)); + File->ModuleDBI->setObjFileName(Path); - // Visit all .debug$T sections to add them to Builder. - for (ObjectFile *File : Symtab->ObjectFiles) { - // Add a module descriptor for every object file. We need to put an absolute - // path to the object into the PDB. If this is a plain object, we make its - // path absolute. If it's an object in an archive, we make the archive path - // absolute. - bool InArchive = !File->ParentName.empty(); - SmallString<128> Path = InArchive ? File->ParentName : File->getName(); - sys::fs::make_absolute(Path); - sys::path::native(Path, llvm::sys::path::Style::windows); - StringRef Name = InArchive ? File->getName() : StringRef(Path); + // Before we can process symbol substreams from .debug$S, we need to process + // type information, file checksums, and the string table. Add type info to + // the PDB first, so that we can get the map from object file type and item + // indices to PDB type and item indices. + CVIndexMap ObjectIndexMap; + const CVIndexMap &IndexMap = mergeDebugT(File, ObjectIndexMap); - File->ModuleDBI = &ExitOnErr(Builder.getDbiBuilder().addModuleInfo(Name)); - File->ModuleDBI->setObjFileName(Path); + // Now do all live .debug$S sections. + for (SectionChunk *DebugChunk : File->getDebugChunks()) { + if (!DebugChunk->isLive() || DebugChunk->getSectionName() != ".debug$S") + continue; - // Before we can process symbol substreams from .debug$S, we need to process - // type information, file checksums, and the string table. Add type info to - // the PDB first, so that we can get the map from object file type and item - // indices to PDB type and item indices. - SmallVector TypeIndexMap; - mergeDebugT(File, IDTable, TypeTable, TypeIndexMap, Handler); + ArrayRef RelocatedDebugContents = + relocateDebugChunk(Alloc, DebugChunk); + if (RelocatedDebugContents.empty()) + continue; - // Now do all line info. - for (SectionChunk *DebugChunk : File->getDebugChunks()) { - if (!DebugChunk->isLive() || DebugChunk->getSectionName() != ".debug$S") - continue; + DebugSubsectionArray Subsections; + BinaryStreamReader Reader(RelocatedDebugContents, support::little); + ExitOnErr(Reader.readArray(Subsections, RelocatedDebugContents.size())); - ArrayRef RelocatedDebugContents = - relocateDebugChunk(Alloc, DebugChunk); - if (RelocatedDebugContents.empty()) - continue; - - DebugSubsectionArray Subsections; - BinaryStreamReader Reader(RelocatedDebugContents, support::little); - ExitOnErr(Reader.readArray(Subsections, RelocatedDebugContents.size())); - - DebugStringTableSubsectionRef CVStrTab; - DebugChecksumsSubsectionRef Checksums; - for (const DebugSubsectionRecord &SS : Subsections) { - switch (SS.kind()) { - case DebugSubsectionKind::StringTable: - ExitOnErr(CVStrTab.initialize(SS.getRecordData())); - break; - case DebugSubsectionKind::FileChecksums: - ExitOnErr(Checksums.initialize(SS.getRecordData())); - break; - case DebugSubsectionKind::Lines: - // We can add the relocated line table directly to the PDB without - // modification because the file checksum offsets will stay the same. - File->ModuleDBI->addDebugSubsection(SS); - break; - case DebugSubsectionKind::Symbols: - mergeSymbolRecords(Alloc, File, TypeIndexMap, SS.getRecordData()); - break; - default: - // FIXME: Process the rest of the subsections. - break; - } - } - - if (Checksums.valid()) { - // Make a new file checksum table that refers to offsets in the PDB-wide - // string table. Generally the string table subsection appears after the - // checksum table, so we have to do this after looping over all the - // subsections. - if (!CVStrTab.valid()) - fatal(".debug$S sections must have both a string table subsection " - "and a checksum subsection table or neither"); - auto NewChecksums = - make_unique(*PDBStrTab); - for (FileChecksumEntry &FC : Checksums) { - StringRef FileName = ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); - ExitOnErr(Builder.getDbiBuilder().addModuleSourceFile( - *File->ModuleDBI, FileName)); - NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum); - } - File->ModuleDBI->addDebugSubsection(std::move(NewChecksums)); + DebugStringTableSubsectionRef CVStrTab; + DebugChecksumsSubsectionRef Checksums; + for (const DebugSubsectionRecord &SS : Subsections) { + switch (SS.kind()) { + case DebugSubsectionKind::StringTable: + ExitOnErr(CVStrTab.initialize(SS.getRecordData())); + break; + case DebugSubsectionKind::FileChecksums: + ExitOnErr(Checksums.initialize(SS.getRecordData())); + break; + case DebugSubsectionKind::Lines: + // We can add the relocated line table directly to the PDB without + // modification because the file checksum offsets will stay the same. + File->ModuleDBI->addDebugSubsection(SS); + break; + case DebugSubsectionKind::Symbols: + mergeSymbolRecords(Alloc, File, IndexMap, SS.getRecordData()); + break; + default: + // FIXME: Process the rest of the subsections. + break; } } - } - Builder.getStringTableBuilder().setStrings(*PDBStrTab); + if (Checksums.valid()) { + // Make a new file checksum table that refers to offsets in the PDB-wide + // string table. Generally the string table subsection appears after the + // checksum table, so we have to do this after looping over all the + // subsections. + if (!CVStrTab.valid()) + fatal(".debug$S sections must have both a string table subsection " + "and a checksum subsection table or neither"); + auto NewChecksums = make_unique(PDBStrTab); + for (FileChecksumEntry &FC : Checksums) { + StringRef FileName = ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); + ExitOnErr(Builder.getDbiBuilder().addModuleSourceFile(*File->ModuleDBI, + FileName)); + NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum); + } + File->ModuleDBI->addDebugSubsection(std::move(NewChecksums)); + } + } +} + +// Add all object files to the PDB. Merge .debug$T sections into IpiData and +// TpiData. +void PDBLinker::addObjectsToPDB() { + for (ObjectFile *File : Symtab->ObjectFiles) + addObjectFile(File); + + Builder.getStringTableBuilder().setStrings(PDBStrTab); // Construct TPI stream contents. addTypeInfo(Builder.getTpiBuilder(), TypeTable); // Construct IPI stream contents. addTypeInfo(Builder.getIpiBuilder(), IDTable); + + // Add public and symbol records stream. + + // For now we don't actually write any thing useful to the publics stream, but + // the act of "getting" it also creates it lazily so that we write an empty + // stream. + (void)Builder.getPublicsBuilder(); } static void addLinkerModuleSymbols(StringRef Path, @@ -423,7 +589,7 @@ static void addLinkerModuleSymbols(StringRef Path, std::string ArgStr = llvm::join(Args, " "); EBS.Fields.push_back("cwd"); SmallString<64> cwd; - llvm::sys::fs::current_path(cwd); + sys::fs::current_path(cwd); EBS.Fields.push_back(cwd); EBS.Fields.push_back("exe"); EBS.Fields.push_back(Config->Argv[0]); @@ -442,8 +608,14 @@ static void addLinkerModuleSymbols(StringRef Path, // Creates a PDB file. void coff::createPDB(SymbolTable *Symtab, ArrayRef SectionTable, const llvm::codeview::DebugInfo *DI) { - BumpPtrAllocator Alloc; - pdb::PDBFileBuilder Builder(Alloc); + PDBLinker PDB(Symtab); + PDB.initialize(DI); + PDB.addObjectsToPDB(); + PDB.addSections(SectionTable); + PDB.commit(); +} + +void PDBLinker::initialize(const llvm::codeview::DebugInfo *DI) { ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize // Create streams in MSF for predefined streams, namely @@ -455,12 +627,7 @@ void coff::createPDB(SymbolTable *Symtab, ArrayRef SectionTable, auto &InfoBuilder = Builder.getInfoBuilder(); InfoBuilder.setAge(DI ? DI->PDB70.Age : 0); - llvm::SmallString<128> NativePath(Config->PDBPath.begin(), - Config->PDBPath.end()); - llvm::sys::fs::make_absolute(NativePath); - llvm::sys::path::native(NativePath, llvm::sys::path::Style::windows); - - pdb::PDB_UniqueId uuid{}; + GUID uuid{}; if (DI) memcpy(&uuid, &DI->PDB70.Signature, sizeof(uuid)); InfoBuilder.setGuid(uuid); @@ -471,32 +638,25 @@ void coff::createPDB(SymbolTable *Symtab, ArrayRef SectionTable, pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); DbiBuilder.setVersionHeader(pdb::PdbDbiV70); ExitOnErr(DbiBuilder.addDbgStream(pdb::DbgHeaderType::NewFPO, {})); +} - // It's not entirely clear what this is, but the * Linker * module uses it. - uint32_t PdbFilePathNI = DbiBuilder.addECName(NativePath); - - TypeTableBuilder TypeTable(BAlloc); - TypeTableBuilder IDTable(BAlloc); - addObjectsToPDB(Alloc, Symtab, Builder, TypeTable, IDTable); - - // Add public and symbol records stream. - - // For now we don't actually write any thing useful to the publics stream, but - // the act of "getting" it also creates it lazily so that we write an empty - // stream. - (void)Builder.getPublicsBuilder(); - +void PDBLinker::addSections(ArrayRef SectionTable) { // Add Section Contributions. + pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); addSectionContribs(Symtab, DbiBuilder); // Add Section Map stream. ArrayRef Sections = { (const object::coff_section *)SectionTable.data(), SectionTable.size() / sizeof(object::coff_section)}; - std::vector SectionMap = - pdb::DbiStreamBuilder::createSectionMap(Sections); + SectionMap = pdb::DbiStreamBuilder::createSectionMap(Sections); DbiBuilder.setSectionMap(SectionMap); + // It's not entirely clear what this is, but the * Linker * module uses it. + NativePath = Config->PDBPath; + sys::fs::make_absolute(NativePath); + sys::path::native(NativePath, sys::path::Style::windows); + uint32_t PdbFilePathNI = DbiBuilder.addECName(NativePath); auto &LinkerModule = ExitOnErr(DbiBuilder.addModuleInfo("* Linker *")); LinkerModule.setPdbFilePathNI(PdbFilePathNI); addLinkerModuleSymbols(NativePath, LinkerModule, Alloc); @@ -504,7 +664,9 @@ void coff::createPDB(SymbolTable *Symtab, ArrayRef SectionTable, // Add COFF section header stream. ExitOnErr( DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable)); +} +void PDBLinker::commit() { // Write to a file. ExitOnErr(Builder.commit(Config->PDBPath)); } diff --git a/contrib/llvm/tools/lld/ELF/Arch/ARM.cpp b/contrib/llvm/tools/lld/ELF/Arch/ARM.cpp index e4b06ade4487..106021de7d32 100644 --- a/contrib/llvm/tools/lld/ELF/Arch/ARM.cpp +++ b/contrib/llvm/tools/lld/ELF/Arch/ARM.cpp @@ -40,6 +40,8 @@ class ARM final : public TargetInfo { void addPltHeaderSymbols(InputSectionBase *ISD) const override; bool needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, const SymbolBody &S) const override; + bool inBranchRange(uint32_t RelocType, uint64_t Src, + uint64_t Dst) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; }; } // namespace @@ -218,6 +220,49 @@ bool ARM::needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, return false; } +bool ARM::inBranchRange(uint32_t RelocType, uint64_t Src, uint64_t Dst) const { + uint64_t Range; + uint64_t InstrSize; + + switch (RelocType) { + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_JUMP24: + case R_ARM_CALL: + Range = 0x2000000; + InstrSize = 4; + break; + case R_ARM_THM_JUMP19: + Range = 0x100000; + InstrSize = 2; + break; + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + Range = 0x1000000; + InstrSize = 2; + break; + default: + return true; + } + // PC at Src is 2 instructions ahead, immediate of branch is signed + if (Src > Dst) + Range -= 2 * InstrSize; + else + Range += InstrSize; + + if ((Dst & 0x1) == 0) + // Destination is ARM, if ARM caller then Src is already 4-byte aligned. + // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure + // destination will be 4 byte aligned. + Src &= ~0x3; + else + // Bit 0 == 1 denotes Thumb state, it is not part of the range + Dst &= ~0x1; + + uint64_t Distance = (Src > Dst) ? Src - Dst : Dst - Src; + return Distance <= Range; +} + void ARM::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { switch (Type) { case R_ARM_ABS32: diff --git a/contrib/llvm/tools/lld/ELF/Arch/MipsArchTree.cpp b/contrib/llvm/tools/lld/ELF/Arch/MipsArchTree.cpp index ed183e9a3061..3d1dc1daf0c1 100644 --- a/contrib/llvm/tools/lld/ELF/Arch/MipsArchTree.cpp +++ b/contrib/llvm/tools/lld/ELF/Arch/MipsArchTree.cpp @@ -37,7 +37,7 @@ struct FileFlags { StringRef Filename; uint32_t Flags; }; -} +} // namespace static StringRef getAbiName(uint32_t Flags) { switch (Flags) { @@ -337,8 +337,8 @@ uint8_t elf::getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, return NewFlag; if (compareMipsFpAbi(OldFlag, NewFlag) < 0) error("target floating point ABI '" + getMipsFpAbiName(OldFlag) + - "' is incompatible with '" + getMipsFpAbiName(NewFlag) + "': " + - FileName); + "' is incompatible with '" + getMipsFpAbiName(NewFlag) + + "': " + FileName); return OldFlag; } diff --git a/contrib/llvm/tools/lld/ELF/Config.h b/contrib/llvm/tools/lld/ELF/Config.h index 5e3b77637316..23627dd812db 100644 --- a/contrib/llvm/tools/lld/ELF/Config.h +++ b/contrib/llvm/tools/lld/ELF/Config.h @@ -99,6 +99,7 @@ struct Configuration { std::vector VersionDefinitions; std::vector Argv; std::vector AuxiliaryList; + std::vector FilterList; std::vector SearchPaths; std::vector SymbolOrderingFile; std::vector Undefined; diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp index 10ad13f214d5..4630e110bcd8 100644 --- a/contrib/llvm/tools/lld/ELF/Driver.cpp +++ b/contrib/llvm/tools/lld/ELF/Driver.cpp @@ -259,6 +259,9 @@ static void checkOptions(opt::InputArgList &Args) { if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); + if (!Config->Shared && !Config->FilterList.empty()) + error("-F may not be used without -shared"); + if (!Config->Shared && !Config->AuxiliaryList.empty()) error("-f may not be used without -shared"); @@ -631,6 +634,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { getArg(Args, OPT_export_dynamic, OPT_no_export_dynamic, false); Config->FatalWarnings = getArg(Args, OPT_fatal_warnings, OPT_no_fatal_warnings, false); + Config->FilterList = getArgs(Args, OPT_filter); Config->Fini = Args.getLastArgValue(OPT_fini, "_fini"); Config->GcSections = getArg(Args, OPT_gc_sections, OPT_no_gc_sections, false); Config->GdbIndex = Args.hasArg(OPT_gdb_index); diff --git a/contrib/llvm/tools/lld/ELF/EhFrame.h b/contrib/llvm/tools/lld/ELF/EhFrame.h index 4e2b6f83a294..07d1aaa3cbb3 100644 --- a/contrib/llvm/tools/lld/ELF/EhFrame.h +++ b/contrib/llvm/tools/lld/ELF/EhFrame.h @@ -19,7 +19,7 @@ struct EhSectionPiece; template size_t readEhRecordSize(InputSectionBase *S, size_t Off); template uint8_t getFdeEncoding(EhSectionPiece *P); -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/Filesystem.h b/contrib/llvm/tools/lld/ELF/Filesystem.h index d56d067f7378..dbeadac5a96b 100644 --- a/contrib/llvm/tools/lld/ELF/Filesystem.h +++ b/contrib/llvm/tools/lld/ELF/Filesystem.h @@ -16,7 +16,7 @@ namespace lld { namespace elf { void unlinkAsync(StringRef Path); std::error_code tryCreateFile(StringRef Path); -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/GdbIndex.h b/contrib/llvm/tools/lld/ELF/GdbIndex.h index c49f8946e199..bc024e6689ef 100644 --- a/contrib/llvm/tools/lld/ELF/GdbIndex.h +++ b/contrib/llvm/tools/lld/ELF/GdbIndex.h @@ -11,8 +11,8 @@ #define LLD_ELF_GDB_INDEX_H #include "InputFiles.h" -#include "llvm/Object/ELF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Object/ELF.h" namespace lld { namespace elf { diff --git a/contrib/llvm/tools/lld/ELF/ICF.h b/contrib/llvm/tools/lld/ELF/ICF.h index 502e128c8109..24219855fc17 100644 --- a/contrib/llvm/tools/lld/ELF/ICF.h +++ b/contrib/llvm/tools/lld/ELF/ICF.h @@ -14,6 +14,6 @@ namespace lld { namespace elf { template void doIcf(); } -} +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.h b/contrib/llvm/tools/lld/ELF/InputFiles.h index 544a0b009b39..f6d3f907850c 100644 --- a/contrib/llvm/tools/lld/ELF/InputFiles.h +++ b/contrib/llvm/tools/lld/ELF/InputFiles.h @@ -11,8 +11,8 @@ #define LLD_ELF_INPUT_FILES_H #include "Config.h" -#include "InputSection.h" #include "Error.h" +#include "InputSection.h" #include "Symbols.h" #include "lld/Core/LLVM.h" @@ -34,7 +34,7 @@ struct DILineInfo; namespace lto { class InputFile; } -} +} // namespace llvm namespace lld { namespace elf { diff --git a/contrib/llvm/tools/lld/ELF/LTO.h b/contrib/llvm/tools/lld/ELF/LTO.h index 28afa0e83add..d19923c90a99 100644 --- a/contrib/llvm/tools/lld/ELF/LTO.h +++ b/contrib/llvm/tools/lld/ELF/LTO.h @@ -30,7 +30,7 @@ namespace llvm { namespace lto { class LTO; } -} +} // namespace llvm namespace lld { namespace elf { @@ -51,7 +51,7 @@ class BitcodeCompiler { std::vector> Buff; std::vector> Files; }; -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp index a182d5a3a096..8bdbd8db20ad 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -229,6 +229,19 @@ bool LinkerScript::shouldKeep(InputSectionBase *S) { return false; } +// If an input string is in the form of "foo.N" where N is a number, +// return N. Otherwise, returns 65536, which is one greater than the +// lowest priority. +static int getPriority(StringRef S) { + size_t Pos = S.rfind('.'); + if (Pos == StringRef::npos) + return 65536; + int V; + if (!to_integer(S.substr(Pos + 1), V, 10)) + return 65536; + return V; +} + // A helper function for the SORT() command. static std::function getComparator(SortSectionPolicy K) { @@ -449,7 +462,7 @@ void LinkerScript::fabricateDefaultCommands() { // The Sections with -T
have been sorted in order of ascending // address. We must lower StartAddr if the lowest -T
as // calls to setDot() must be monotonically increasing. - for (auto& KV : Config->SectionStartMap) + for (auto &KV : Config->SectionStartMap) StartAddr = std::min(StartAddr, KV.second); Commands.push_back(make( @@ -739,7 +752,7 @@ void LinkerScript::adjustSectionsAfterSorting() { Cmd->MemRegion = findMemoryRegion(Cmd); // Handle align (e.g. ".foo : ALIGN(16) { ... }"). if (Cmd->AlignExpr) - Cmd->Sec->updateAlignment(Cmd->AlignExpr().getValue()); + Cmd->Sec->updateAlignment(Cmd->AlignExpr().getValue()); } } @@ -1071,7 +1084,7 @@ template void OutputSectionCommand::finalize() { } if ((Sec->Flags & SHF_LINK_ORDER)) { - std::sort(Sections.begin(), Sections.end(), compareByFilePosition); + std::stable_sort(Sections.begin(), Sections.end(), compareByFilePosition); for (int I = 0, N = Sections.size(); I < N; ++I) *ScriptSections[I] = Sections[I]; diff --git a/contrib/llvm/tools/lld/ELF/MapFile.cpp b/contrib/llvm/tools/lld/ELF/MapFile.cpp index e0c7d8cd8b1b..2b2a95c47cf9 100644 --- a/contrib/llvm/tools/lld/ELF/MapFile.cpp +++ b/contrib/llvm/tools/lld/ELF/MapFile.cpp @@ -55,7 +55,7 @@ template std::vector getSymbols() { for (SymbolBody *B : File->getSymbols()) if (B->File == File && !B->isSection()) if (auto *Sym = dyn_cast(B)) - if (Sym->Section) + if (Sym->Section && Sym->Section->Live) V.push_back(Sym); return V; } diff --git a/contrib/llvm/tools/lld/ELF/MapFile.h b/contrib/llvm/tools/lld/ELF/MapFile.h index 68d8ba8d4a04..460848ff24d3 100644 --- a/contrib/llvm/tools/lld/ELF/MapFile.h +++ b/contrib/llvm/tools/lld/ELF/MapFile.h @@ -17,7 +17,7 @@ namespace elf { struct OutputSectionCommand; template void writeMapFile(llvm::ArrayRef Script); -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/Memory.h b/contrib/llvm/tools/lld/ELF/Memory.h index e5a04ed1e5a8..4000f2f9f1c9 100644 --- a/contrib/llvm/tools/lld/ELF/Memory.h +++ b/contrib/llvm/tools/lld/ELF/Memory.h @@ -61,7 +61,7 @@ inline void freeArena() { Alloc->reset(); BAlloc.Reset(); } -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/Options.td b/contrib/llvm/tools/lld/ELF/Options.td index 29e14c530c6a..9c78608118cc 100644 --- a/contrib/llvm/tools/lld/ELF/Options.td +++ b/contrib/llvm/tools/lld/ELF/Options.td @@ -104,6 +104,8 @@ def export_dynamic_symbol: S<"export-dynamic-symbol">, def fatal_warnings: F<"fatal-warnings">, HelpText<"Treat warnings as errors">; +def filter: J<"filter=">, HelpText<"Set DT_FILTER field to the specified name">; + def fini: S<"fini">, MetaVarName<"">, HelpText<"Specify a finalizer function">; @@ -305,6 +307,7 @@ def alias_exclude_libs: J<"exclude-libs=">, Alias; def alias_export_dynamic_E: Flag<["-"], "E">, Alias; def alias_export_dynamic_symbol: J<"export-dynamic-symbol=">, Alias; +def alias_filter: Separate<["-"], "F">, Alias; def alias_fini_fini: J<"fini=">, Alias; def alias_format_b: S<"b">, Alias; def alias_hash_style_hash_style: J<"hash-style=">, Alias; @@ -339,6 +342,7 @@ def alias_Ttext_segment: S<"Ttext-segment">, Alias; def alias_Ttext_segment_eq: J<"Ttext-segment=">, Alias; def alias_undefined_eq: J<"undefined=">, Alias; def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias; +def alias_version_script_eq: J<"version-script=">, Alias; def alias_version_V: Flag<["-"], "V">, Alias; def alias_wrap_wrap: J<"wrap=">, Alias; @@ -406,6 +410,3 @@ def EL : F<"EL">; def G: JoinedOrSeparate<["-"], "G">; def Qy : F<"Qy">; -// Aliases for ignored options -def alias_version_script_version_script: J<"version-script=">, - Alias; diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.cpp b/contrib/llvm/tools/lld/ELF/OutputSections.cpp index d6ae5dcae167..abe548165866 100644 --- a/contrib/llvm/tools/lld/ELF/OutputSections.cpp +++ b/contrib/llvm/tools/lld/ELF/OutputSections.cpp @@ -222,16 +222,16 @@ void OutputSectionFactory::addInputSec(InputSectionBase *IS, if (Sec) { if (getIncompatibleFlags(Sec->Flags) != getIncompatibleFlags(IS->Flags)) - error("incompatible section flags for " + Sec->Name + - "\n>>> " + toString(IS) + ": 0x" + utohexstr(IS->Flags) + + error("incompatible section flags for " + Sec->Name + "\n>>> " + + toString(IS) + ": 0x" + utohexstr(IS->Flags) + "\n>>> output section " + Sec->Name + ": 0x" + utohexstr(Sec->Flags)); if (Sec->Type != IS->Type) { if (canMergeToProgbits(Sec->Type) && canMergeToProgbits(IS->Type)) Sec->Type = SHT_PROGBITS; else - error("section type mismatch for " + IS->Name + - "\n>>> " + toString(IS) + ": " + + error("section type mismatch for " + IS->Name + "\n>>> " + + toString(IS) + ": " + getELFSectionTypeName(Config->EMachine, IS->Type) + "\n>>> output section " + Sec->Name + ": " + getELFSectionTypeName(Config->EMachine, Sec->Type)); diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.h b/contrib/llvm/tools/lld/ELF/OutputSections.h index 68ee066a13da..68b46ebf6a7b 100644 --- a/contrib/llvm/tools/lld/ELF/OutputSections.h +++ b/contrib/llvm/tools/lld/ELF/OutputSections.h @@ -111,8 +111,8 @@ struct SectionKey { uint64_t Flags; uint32_t Alignment; }; -} -} +} // namespace elf +} // namespace lld namespace llvm { template <> struct DenseMapInfo { static lld::elf::SectionKey getEmptyKey(); @@ -121,7 +121,7 @@ template <> struct DenseMapInfo { static bool isEqual(const lld::elf::SectionKey &LHS, const lld::elf::SectionKey &RHS); }; -} +} // namespace llvm namespace lld { namespace elf { @@ -150,5 +150,4 @@ extern std::vector OutputSectionCommands; } // namespace elf } // namespace lld - #endif diff --git a/contrib/llvm/tools/lld/ELF/Relocations.cpp b/contrib/llvm/tools/lld/ELF/Relocations.cpp index 52dbe4b583d0..e5fcb2dcc582 100644 --- a/contrib/llvm/tools/lld/ELF/Relocations.cpp +++ b/contrib/llvm/tools/lld/ELF/Relocations.cpp @@ -276,7 +276,7 @@ handleTlsRelocation(uint32_t Type, SymbolBody &Body, InputSectionBase &C, } else { C.Relocations.push_back( {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, - Offset, Addend, &Body}); + Offset, Addend, &Body}); } return Target->TlsGdRelaxSkip; } @@ -1000,16 +1000,20 @@ void ThunkCreator::mergeThunks() { } } -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS, +static uint32_t findEndOfFirstNonExec(OutputSectionCommand &Cmd) { + for (BaseCommand *Base : Cmd.Commands) + if (auto *ISD = dyn_cast(Base)) + for (auto *IS : ISD->Sections) + if ((IS->Flags & SHF_EXECINSTR) == 0) + return IS->OutSecOff + IS->getSize(); + return 0; +} + +ThunkSection *ThunkCreator::getOSThunkSec(OutputSectionCommand *Cmd, std::vector *ISR) { if (CurTS == nullptr) { - uint32_t Off = 0; - for (auto *IS : OS->Sections) { - Off = IS->OutSecOff + IS->getSize(); - if ((IS->Flags & SHF_EXECINSTR) == 0) - break; - } - CurTS = addThunkSection(OS, ISR, Off); + uint32_t Off = findEndOfFirstNonExec(*Cmd); + CurTS = addThunkSection(Cmd->Sec, ISR, Off); } return CurTS; } @@ -1024,7 +1028,7 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS, OutputSection *OS) { OutputSectionCommand *C = Script->getCmd(TOS); std::vector *Range = nullptr; for (BaseCommand *BC : C->Commands) - if (auto *ISD = dyn_cast (BC)) { + if (auto *ISD = dyn_cast(BC)) { InputSection *first = ISD->Sections.front(); InputSection *last = ISD->Sections.back(); if (IS->OutSecOff >= first->OutSecOff && @@ -1046,7 +1050,6 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS, return TS; } - std::pair ThunkCreator::getThunk(SymbolBody &Body, uint32_t Type) { auto Res = ThunkedSymbols.insert({&Body, std::vector()}); @@ -1066,7 +1069,7 @@ std::pair ThunkCreator::getThunk(SymbolBody &Body, // InputSectionDescription::Sections. void ThunkCreator::forEachExecInputSection( ArrayRef OutputSections, - std::function *, + std::function *, InputSection *)> Fn) { for (OutputSectionCommand *Cmd : OutputSections) { @@ -1077,7 +1080,7 @@ void ThunkCreator::forEachExecInputSection( if (auto *ISD = dyn_cast(BC)) { CurTS = nullptr; for (InputSection *IS : ISD->Sections) - Fn(OS, &ISD->Sections, IS); + Fn(Cmd, &ISD->Sections, IS); } } } @@ -1103,32 +1106,32 @@ bool ThunkCreator::createThunks( // We separate the creation of ThunkSections from the insertion of the // ThunkSections back into the OutputSection as ThunkSections are not always // inserted into the same OutputSection as the caller. - forEachExecInputSection( - OutputSections, [&](OutputSection *OS, std::vector *ISR, - InputSection *IS) { - for (Relocation &Rel : IS->Relocations) { - SymbolBody &Body = *Rel.Sym; - if (Thunks.find(&Body) != Thunks.end() || - !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) - continue; - Thunk *T; - bool IsNew; - std::tie(T, IsNew) = getThunk(Body, Rel.Type); - if (IsNew) { - // Find or create a ThunkSection for the new Thunk - ThunkSection *TS; - if (auto *TIS = T->getTargetInputSection()) - TS = getISThunkSec(TIS, OS); - else - TS = getOSThunkSec(OS, ISR); - TS->addThunk(T); - Thunks[T->ThunkSym] = T; - } - // Redirect relocation to Thunk, we never go via the PLT to a Thunk - Rel.Sym = T->ThunkSym; - Rel.Expr = fromPlt(Rel.Expr); - } - }); + forEachExecInputSection(OutputSections, [&](OutputSectionCommand *Cmd, + std::vector *ISR, + InputSection *IS) { + for (Relocation &Rel : IS->Relocations) { + SymbolBody &Body = *Rel.Sym; + if (Thunks.find(&Body) != Thunks.end() || + !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) + continue; + Thunk *T; + bool IsNew; + std::tie(T, IsNew) = getThunk(Body, Rel.Type); + if (IsNew) { + // Find or create a ThunkSection for the new Thunk + ThunkSection *TS; + if (auto *TIS = T->getTargetInputSection()) + TS = getISThunkSec(TIS, Cmd->Sec); + else + TS = getOSThunkSec(Cmd, ISR); + TS->addThunk(T); + Thunks[T->ThunkSym] = T; + } + // Redirect relocation to Thunk, we never go via the PLT to a Thunk + Rel.Sym = T->ThunkSym; + Rel.Expr = fromPlt(Rel.Expr); + } + }); // Merge all created synthetic ThunkSections back into OutputSection mergeThunks(); ++Pass; diff --git a/contrib/llvm/tools/lld/ELF/Relocations.h b/contrib/llvm/tools/lld/ELF/Relocations.h index fc3e3444ac24..ea046d248474 100644 --- a/contrib/llvm/tools/lld/ELF/Relocations.h +++ b/contrib/llvm/tools/lld/ELF/Relocations.h @@ -103,7 +103,8 @@ struct RelExprMaskBuilder { // RelExpr's as a constant bit mask and test for membership with a // couple cheap bitwise operations. template bool isRelExprOneOf(RelExpr Expr) { - assert(0 <= Expr && (int)Expr < 64 && "RelExpr is too large for 64-bit mask!"); + assert(0 <= Expr && (int)Expr < 64 && + "RelExpr is too large for 64-bit mask!"); return (uint64_t(1) << Expr) & RelExprMaskBuilder::build(); } @@ -133,12 +134,12 @@ class ThunkCreator { private: void mergeThunks(); - ThunkSection *getOSThunkSec(OutputSection *OS, + ThunkSection *getOSThunkSec(OutputSectionCommand *Cmd, std::vector *ISR); ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS); void forEachExecInputSection( ArrayRef OutputSections, - std::function *, + std::function *, InputSection *)> Fn); std::pair getThunk(SymbolBody &Body, uint32_t Type); @@ -178,7 +179,7 @@ template static inline int64_t getAddend(const typename ELFT::Rela &Rel) { return Rel.r_addend; } -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp index 72940ca0cfd4..b3847081697c 100644 --- a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp @@ -113,6 +113,12 @@ class ScriptParser final : ScriptLexer { }; } // namespace +static StringRef unquote(StringRef S) { + if (S.startswith("\"")) + return S.substr(1, S.size() - 2); + return S; +} + static bool isUnderSysroot(StringRef Path) { if (Config->Sysroot == "") return false; @@ -1103,6 +1109,10 @@ void ScriptParser::readVersionDeclaration(StringRef VerStr) { expect(";"); } +static bool hasWildcard(StringRef S) { + return S.find_first_of("?*[") != StringRef::npos; +} + // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". std::pair, std::vector> ScriptParser::readSymbols() { diff --git a/contrib/llvm/tools/lld/ELF/Strings.cpp b/contrib/llvm/tools/lld/ELF/Strings.cpp index 2e88bfba0fc1..bca86384002d 100644 --- a/contrib/llvm/tools/lld/ELF/Strings.cpp +++ b/contrib/llvm/tools/lld/ELF/Strings.cpp @@ -38,29 +38,6 @@ bool StringMatcher::match(StringRef S) const { return false; } -// If an input string is in the form of "foo.N" where N is a number, -// return N. Otherwise, returns 65536, which is one greater than the -// lowest priority. -int elf::getPriority(StringRef S) { - size_t Pos = S.rfind('.'); - if (Pos == StringRef::npos) - return 65536; - int V; - if (!to_integer(S.substr(Pos + 1), V, 10)) - return 65536; - return V; -} - -bool elf::hasWildcard(StringRef S) { - return S.find_first_of("?*[") != StringRef::npos; -} - -StringRef elf::unquote(StringRef S) { - if (!S.startswith("\"")) - return S; - return S.substr(1, S.size() - 2); -} - // Converts a hex string (e.g. "deadbeef") to a vector. std::vector elf::parseHex(StringRef S) { std::vector Hex; diff --git a/contrib/llvm/tools/lld/ELF/Strings.h b/contrib/llvm/tools/lld/ELF/Strings.h index fd1aa40539d2..68ccafa2ff17 100644 --- a/contrib/llvm/tools/lld/ELF/Strings.h +++ b/contrib/llvm/tools/lld/ELF/Strings.h @@ -21,11 +21,8 @@ namespace lld { namespace elf { -int getPriority(StringRef S); -bool hasWildcard(StringRef S); std::vector parseHex(StringRef S); bool isValidCIdentifier(StringRef S); -StringRef unquote(StringRef S); // This is a lazy version of StringRef. String size is computed lazily // when it is needed. It is more efficient than StringRef to instantiate @@ -76,7 +73,7 @@ llvm::Optional demangle(StringRef Name); inline ArrayRef toArrayRef(StringRef S) { return {(const uint8_t *)S.data(), S.size()}; } -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp index c802d74b8ff8..83091057ebed 100644 --- a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp @@ -172,8 +172,8 @@ template void SymbolTable::addSymbolWrap(StringRef Name) { } // Creates alias for symbol. Used to implement --defsym=ALIAS=SYM. -template void SymbolTable::addSymbolAlias(StringRef Alias, - StringRef Name) { +template +void SymbolTable::addSymbolAlias(StringRef Alias, StringRef Name) { SymbolBody *B = find(Name); if (!B) { error("-defsym: undefined symbol: " + Name); @@ -211,13 +211,6 @@ static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { // Find an existing symbol or create and insert a new one. template std::pair SymbolTable::insert(StringRef Name) { - // @@ means the symbol is the default version. In that - // case symbol must exist and @@ will be used to - // resolve references to . - size_t Pos = Name.find("@@"); - if (Pos != StringRef::npos) - Name = Name.take_front(Pos); - auto P = Symtab.insert( {CachedHashStringRef(Name), SymIndex((int)SymVector.size(), false)}); SymIndex &V = P.first->second; @@ -400,9 +393,8 @@ static void warnOrError(const Twine &Msg) { } static void reportDuplicate(SymbolBody *Sym, InputFile *NewFile) { - warnOrError("duplicate symbol: " + toString(*Sym) + - "\n>>> defined in " + toString(Sym->File) + - "\n>>> defined in " + toString(NewFile)); + warnOrError("duplicate symbol: " + toString(*Sym) + "\n>>> defined in " + + toString(Sym->File) + "\n>>> defined in " + toString(NewFile)); } template @@ -680,7 +672,8 @@ template void SymbolTable::handleAnonymousVersion() { // Set symbol versions to symbols. This function handles patterns // containing no wildcard characters. template -void SymbolTable::assignExactVersion(SymbolVersion Ver, uint16_t VersionId, +void SymbolTable::assignExactVersion(SymbolVersion Ver, + uint16_t VersionId, StringRef VersionName) { if (Ver.HasWildcard) return; @@ -724,13 +717,35 @@ void SymbolTable::assignWildcardVersion(SymbolVersion Ver, B->symbol()->VersionId = VersionId; } +static bool isDefaultVersion(SymbolBody *B) { + return B->isInCurrentDSO() && B->getName().find("@@") != StringRef::npos; +} + // This function processes version scripts by updating VersionId // member of symbols. template void SymbolTable::scanVersionScript() { + // Symbol themselves might know their versions because symbols + // can contain versions in the form of @. + // Let them parse and update their names to exclude version suffix. + for (Symbol *Sym : SymVector) { + SymbolBody *Body = Sym->body(); + bool IsDefault = isDefaultVersion(Body); + Body->parseSymbolVersion(); + + if (!IsDefault) + continue; + + // @@ means the symbol is the default version. If that's the + // case, the symbol is not used only to resolve of version + // but also undefined unversioned symbols with name . + SymbolBody *S = find(Body->getName()); + if (S && S->isUndefined()) + S->copy(Body); + } + // Handle edge cases first. handleAnonymousVersion(); - // Now we have version definitions, so we need to set version ids to symbols. // Each version definition has a glob pattern, and all symbols that match // with the pattern get that version. diff --git a/contrib/llvm/tools/lld/ELF/Symbols.cpp b/contrib/llvm/tools/lld/ELF/Symbols.cpp index 1d17f57f0c30..c69007e781a6 100644 --- a/contrib/llvm/tools/lld/ELF/Symbols.cpp +++ b/contrib/llvm/tools/lld/ELF/Symbols.cpp @@ -94,7 +94,7 @@ static uint64_t getSymVA(const SymbolBody &Body, int64_t &Addend) { if (D.isTls() && !Config->Relocatable) { if (!Out::TlsPhdr) fatal(toString(D.File) + - " has a STT_TLS symbol but doesn't have a PT_TLS section"); + " has an STT_TLS symbol but doesn't have an SHF_TLS section"); return VA - Out::TlsPhdr->p_vaddr; } return VA; diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp index fd724fac327c..4bbec4ab34bd 100644 --- a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp @@ -662,7 +662,12 @@ bool GotSection::empty() const { return NumEntries == 0 && !HasGotOffRel; } -void GotSection::writeTo(uint8_t *Buf) { relocateAlloc(Buf, Buf + Size); } +void GotSection::writeTo(uint8_t *Buf) { + // Buf points to the start of this section's buffer, + // whereas InputSectionBase::relocateAlloc() expects its argument + // to point to the start of the output section. + relocateAlloc(Buf - OutSecOff, Buf - OutSecOff + Size); +} MipsGotSection::MipsGotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, SHT_PROGBITS, 16, @@ -812,9 +817,7 @@ unsigned MipsGotSection::getLocalEntriesNum() const { LocalEntries32.size(); } -void MipsGotSection::finalizeContents() { - updateAllocSize(); -} +void MipsGotSection::finalizeContents() { updateAllocSize(); } void MipsGotSection::updateAllocSize() { PageEntriesNum = 0; @@ -838,9 +841,7 @@ bool MipsGotSection::empty() const { return Config->Relocatable; } -uint64_t MipsGotSection::getGp() const { - return ElfSym::MipsGp->getVA(0); -} +uint64_t MipsGotSection::getGp() const { return ElfSym::MipsGp->getVA(0); } static uint64_t readUint(uint8_t *Buf) { if (Config->Is64) @@ -1019,6 +1020,8 @@ DynamicSection::DynamicSection() template void DynamicSection::addEntries() { // Add strings to .dynstr early so that .dynstr's size will be // fixed early. + for (StringRef S : Config->FilterList) + add({DT_FILTER, InX::DynStrTab->addString(S)}); for (StringRef S : Config->AuxiliaryList) add({DT_AUXILIARY, InX::DynStrTab->addString(S)}); if (!Config->Rpath.empty()) @@ -1607,7 +1610,7 @@ HashTableSection::HashTableSection() template void HashTableSection::finalizeContents() { getParent()->Link = InX::DynSymTab->getParent()->SectionIndex; - unsigned NumEntries = 2; // nbucket and nchain. + unsigned NumEntries = 2; // nbucket and nchain. NumEntries += InX::DynSymTab->getNumSymbols(); // The chain entries. // Create as many buckets as there are symbols. @@ -1926,9 +1929,7 @@ void GdbIndexSection::writeTo(uint8_t *Buf) { StringPool.write(Buf); } -bool GdbIndexSection::empty() const { - return !Out::DebugInfo; -} +bool GdbIndexSection::empty() const { return !Out::DebugInfo; } template EhFrameHeader::EhFrameHeader() @@ -2211,9 +2212,7 @@ void MergeSyntheticSection::finalizeContents() { finalizeNoTailMerge(); } -size_t MergeSyntheticSection::getSize() const { - return Builder.getSize(); -} +size_t MergeSyntheticSection::getSize() const { return Builder.getSize(); } // This function decompresses compressed sections and scans over the input // sections to create mergeable synthetic sections. It removes @@ -2312,7 +2311,7 @@ ThunkSection::ThunkSection(OutputSection *OS, uint64_t Off) } void ThunkSection::addThunk(Thunk *T) { - uint64_t Off = alignTo(Size, T->alignment); + uint64_t Off = alignTo(Size, T->Alignment); T->Offset = Off; Thunks.push_back(T); T->addSymbols(*this); diff --git a/contrib/llvm/tools/lld/ELF/Target.cpp b/contrib/llvm/tools/lld/ELF/Target.cpp index c886419971bc..11986efc746f 100644 --- a/contrib/llvm/tools/lld/ELF/Target.cpp +++ b/contrib/llvm/tools/lld/ELF/Target.cpp @@ -128,6 +128,11 @@ bool TargetInfo::needsThunk(RelExpr Expr, uint32_t RelocType, return false; } +bool TargetInfo::inBranchRange(uint32_t RelocType, uint64_t Src, + uint64_t Dst) const { + return true; +} + void TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { writeGotPlt(Buf, S); } diff --git a/contrib/llvm/tools/lld/ELF/Target.h b/contrib/llvm/tools/lld/ELF/Target.h index 5914d9bbb7ef..1658a81c9b71 100644 --- a/contrib/llvm/tools/lld/ELF/Target.h +++ b/contrib/llvm/tools/lld/ELF/Target.h @@ -51,6 +51,9 @@ class TargetInfo { // targeting S. virtual bool needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, const SymbolBody &S) const; + // Return true if we can reach Dst from Src with Relocation RelocType + virtual bool inBranchRange(uint32_t RelocType, uint64_t Src, + uint64_t Dst) const; virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, const uint8_t *Loc) const = 0; virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0; @@ -154,6 +157,6 @@ static void checkAlignment(uint8_t *Loc, uint64_t V, uint32_t Type) { lld::toString(Type)); } } // namespace elf -} +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/Threads.h b/contrib/llvm/tools/lld/ELF/Threads.h index e01afd4d3fc9..9feb8683976c 100644 --- a/contrib/llvm/tools/lld/ELF/Threads.h +++ b/contrib/llvm/tools/lld/ELF/Threads.h @@ -82,7 +82,7 @@ inline void parallelForEachN(size_t Begin, size_t End, else for_each_n(llvm::parallel::seq, Begin, End, Fn); } -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lld/ELF/Thunks.cpp b/contrib/llvm/tools/lld/ELF/Thunks.cpp index cae31027e557..07289d0efdf1 100644 --- a/contrib/llvm/tools/lld/ELF/Thunks.cpp +++ b/contrib/llvm/tools/lld/ELF/Thunks.cpp @@ -72,9 +72,7 @@ class ARMV7PILongThunk final : public Thunk { class ThumbV7ABSLongThunk final : public Thunk { public: - ThumbV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) { - alignment = 2; - } + ThumbV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) { Alignment = 2; } uint32_t size() const override { return 10; } void writeTo(uint8_t *Buf, ThunkSection &IS) const override; @@ -84,9 +82,7 @@ class ThumbV7ABSLongThunk final : public Thunk { class ThumbV7PILongThunk final : public Thunk { public: - ThumbV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) { - alignment = 2; - } + ThumbV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) { Alignment = 2; } uint32_t size() const override { return 12; } void writeTo(uint8_t *Buf, ThunkSection &IS) const override; @@ -218,10 +214,10 @@ bool ThumbV7PILongThunk::isCompatibleWith(uint32_t RelocType) const { // Write MIPS LA25 thunk code to call PIC function from the non-PIC one. void MipsThunk::writeTo(uint8_t *Buf, ThunkSection &) const { uint64_t S = Destination.getVA(); - write32(Buf, 0x3c190000, Config->Endianness); // lui $25, %hi(func) + write32(Buf, 0x3c190000, Config->Endianness); // lui $25, %hi(func) write32(Buf + 4, 0x08000000 | (S >> 2), Config->Endianness); // j func - write32(Buf + 8, 0x27390000, Config->Endianness); // addiu $25, $25, %lo(func) - write32(Buf + 12, 0x00000000, Config->Endianness); // nop + write32(Buf + 8, 0x27390000, Config->Endianness); // addiu $25, $25, %lo(func) + write32(Buf + 12, 0x00000000, Config->Endianness); // nop Target->relocateOne(Buf, R_MIPS_HI16, S); Target->relocateOne(Buf + 8, R_MIPS_LO16, S); } @@ -262,9 +258,7 @@ static Thunk *addThunkArm(uint32_t Reloc, SymbolBody &S) { fatal("unrecognized relocation type"); } -static Thunk *addThunkMips(SymbolBody &S) { - return make(S); -} +static Thunk *addThunkMips(SymbolBody &S) { return make(S); } Thunk *addThunk(uint32_t RelocType, SymbolBody &S) { if (Config->EMachine == EM_ARM) diff --git a/contrib/llvm/tools/lld/ELF/Thunks.h b/contrib/llvm/tools/lld/ELF/Thunks.h index 00b6b2cf2994..21eba699fe4f 100644 --- a/contrib/llvm/tools/lld/ELF/Thunks.h +++ b/contrib/llvm/tools/lld/ELF/Thunks.h @@ -50,7 +50,7 @@ class Thunk { const SymbolBody &Destination; SymbolBody *ThunkSym; uint64_t Offset; - uint32_t alignment = 4; + uint32_t Alignment = 4; }; // For a Relocation to symbol S create a Thunk to be added to a synthetic diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp index bf43ee5c5f91..1853f99bc600 100644 --- a/contrib/llvm/tools/lld/ELF/Writer.cpp +++ b/contrib/llvm/tools/lld/ELF/Writer.cpp @@ -257,7 +257,6 @@ template void Writer::run() { if (ErrorCount) return; - // Handle -Map option. writeMapFile(OutputSectionCommands); if (ErrorCount) @@ -1331,7 +1330,7 @@ template void Writer::addPredefinedSections() { // ARM ABI requires .ARM.exidx to be terminated by some piece of data. // We have the terminater synthetic section class. Add that at the end. OutputSectionCommand *Cmd = findSectionCommand(".ARM.exidx"); - if (!Cmd || Cmd->Commands.empty() || Config->Relocatable) + if (!Cmd || !Cmd->Sec || Config->Relocatable) return; auto *Sentinel = make(); @@ -1392,7 +1391,8 @@ OutputSectionCommand *Writer::findSectionCommand(StringRef Name) { return nullptr; } -template OutputSection *Writer::findSectionInScript(StringRef Name) { +template +OutputSection *Writer::findSectionInScript(StringRef Name) { if (OutputSectionCommand *Cmd = findSectionCommand(Name)) return Cmd->Sec; return nullptr; diff --git a/contrib/llvm/tools/lld/ELF/Writer.h b/contrib/llvm/tools/lld/ELF/Writer.h index e935b6419de6..7fa56bea1c35 100644 --- a/contrib/llvm/tools/lld/ELF/Writer.h +++ b/contrib/llvm/tools/lld/ELF/Writer.h @@ -55,7 +55,7 @@ uint8_t getMipsFpAbiFlag(uint8_t OldFlag, uint8_t NewFlag, llvm::StringRef FileName); bool isMipsN32Abi(const InputFile *F); -} -} +} // namespace elf +} // namespace lld #endif diff --git a/contrib/llvm/tools/lldb/include/lldb/Host/MainLoop.h b/contrib/llvm/tools/lldb/include/lldb/Host/MainLoop.h index a722348b8843..5ac145ff865b 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Host/MainLoop.h +++ b/contrib/llvm/tools/lldb/include/lldb/Host/MainLoop.h @@ -12,8 +12,8 @@ #include "lldb/Host/Config.h" #include "lldb/Host/MainLoopBase.h" - #include "llvm/ADT/DenseMap.h" +#include #if !HAVE_PPOLL && !HAVE_SYS_EVENT_H #define SIGNAL_POLLING_UNSUPPORTED 1 diff --git a/contrib/llvm/tools/lldb/include/lldb/Host/PosixApi.h b/contrib/llvm/tools/lldb/include/lldb/Host/PosixApi.h index 02324307dc9e..d5c48dd6d170 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Host/PosixApi.h +++ b/contrib/llvm/tools/lldb/include/lldb/Host/PosixApi.h @@ -16,6 +16,8 @@ #if defined(_WIN32) #include "lldb/Host/windows/PosixApi.h" +#else +#include #endif #endif diff --git a/contrib/llvm/tools/lldb/include/lldb/Host/SocketAddress.h b/contrib/llvm/tools/lldb/include/lldb/Host/SocketAddress.h index 8e9026ba962c..ebc6f4e57ee8 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Host/SocketAddress.h +++ b/contrib/llvm/tools/lldb/include/lldb/Host/SocketAddress.h @@ -151,6 +151,11 @@ class SocketAddress { //------------------------------------------------------------------ bool IsAnyAddr() const; + //------------------------------------------------------------------ + // Returns true if the socket is INADDR_LOOPBACK + //------------------------------------------------------------------ + bool IsLocalhost() const; + //------------------------------------------------------------------ // Direct access to all of the sockaddr structures //------------------------------------------------------------------ diff --git a/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h b/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h index 5f2157510c0a..9671d710fc02 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h +++ b/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h @@ -33,8 +33,7 @@ class ResumeActionList; //------------------------------------------------------------------ // NativeProcessProtocol //------------------------------------------------------------------ -class NativeProcessProtocol - : public std::enable_shared_from_this { +class NativeProcessProtocol { friend class SoftwareBreakpoint; public: @@ -268,7 +267,7 @@ class NativeProcessProtocol /// A NativeProcessProtocol shared pointer if the operation succeeded or /// an error object if it failed. //------------------------------------------------------------------ - virtual llvm::Expected + virtual llvm::Expected> Launch(ProcessLaunchInfo &launch_info, NativeDelegate &native_delegate, MainLoop &mainloop) const = 0; @@ -292,7 +291,7 @@ class NativeProcessProtocol /// A NativeProcessProtocol shared pointer if the operation succeeded or /// an error object if it failed. //------------------------------------------------------------------ - virtual llvm::Expected + virtual llvm::Expected> Attach(lldb::pid_t pid, NativeDelegate &native_delegate, MainLoop &mainloop) const = 0; }; diff --git a/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeThreadProtocol.h b/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeThreadProtocol.h index 2e6c96a34cf5..d96f71311185 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeThreadProtocol.h +++ b/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeThreadProtocol.h @@ -23,7 +23,7 @@ namespace lldb_private { class NativeThreadProtocol : public std::enable_shared_from_this { public: - NativeThreadProtocol(NativeProcessProtocol *process, lldb::tid_t tid); + NativeThreadProtocol(NativeProcessProtocol &process, lldb::tid_t tid); virtual ~NativeThreadProtocol() {} @@ -46,7 +46,7 @@ class NativeThreadProtocol lldb::tid_t GetID() const { return m_tid; } - NativeProcessProtocolSP GetProcess(); + NativeProcessProtocol &GetProcess() { return m_process; } // --------------------------------------------------------------------- // Thread-specific watchpoints @@ -64,7 +64,7 @@ class NativeThreadProtocol virtual Status RemoveHardwareBreakpoint(lldb::addr_t addr) = 0; protected: - NativeProcessProtocolWP m_process_wp; + NativeProcessProtocol &m_process; lldb::tid_t m_tid; }; } diff --git a/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandInterpreter.h b/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandInterpreter.h index f47411079a3a..73bd7d6e6220 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandInterpreter.h +++ b/contrib/llvm/tools/lldb/include/lldb/Interpreter/CommandInterpreter.h @@ -539,7 +539,7 @@ class CommandInterpreter : public Broadcaster, std::string m_repeat_command; // Stores the command that will be executed for // an empty command string. lldb::ScriptInterpreterSP m_script_interpreter_sp; - std::mutex m_script_interpreter_mutex; + std::recursive_mutex m_script_interpreter_mutex; lldb::IOHandlerSP m_command_io_handler_sp; char m_comment_char; bool m_batch_command_mode; diff --git a/contrib/llvm/tools/lldb/include/lldb/Utility/DataExtractor.h b/contrib/llvm/tools/lldb/include/lldb/Utility/DataExtractor.h index 58240d9a5268..4ef78c1af492 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Utility/DataExtractor.h +++ b/contrib/llvm/tools/lldb/include/lldb/Utility/DataExtractor.h @@ -15,6 +15,7 @@ #include "lldb/lldb-forward.h" // for DataBufferSP #include "lldb/lldb-types.h" +#include #include #include diff --git a/contrib/llvm/tools/lldb/include/lldb/lldb-private-forward.h b/contrib/llvm/tools/lldb/include/lldb/lldb-private-forward.h index 69a231132982..296facb1a0bd 100644 --- a/contrib/llvm/tools/lldb/include/lldb/lldb-private-forward.h +++ b/contrib/llvm/tools/lldb/include/lldb/lldb-private-forward.h @@ -30,10 +30,6 @@ class UnixSignals; // SP/WP decls. // --------------------------------------------------------------- typedef std::shared_ptr NativeBreakpointSP; -typedef std::shared_ptr - NativeProcessProtocolSP; -typedef std::weak_ptr - NativeProcessProtocolWP; typedef std::shared_ptr NativeRegisterContextSP; typedef std::shared_ptr diff --git a/contrib/llvm/tools/lldb/include/lldb/lldb-types.h b/contrib/llvm/tools/lldb/include/lldb/lldb-types.h index 07e9f5ac7161..fc445f55a9e5 100644 --- a/contrib/llvm/tools/lldb/include/lldb/lldb-types.h +++ b/contrib/llvm/tools/lldb/include/lldb/lldb-types.h @@ -13,16 +13,10 @@ #include "lldb/lldb-enumerations.h" #include "lldb/lldb-forward.h" -#include -#include #include //---------------------------------------------------------------------- // All host systems must define: -// lldb::condition_t The native condition type (or a substitute class) -// for conditions on the host system. -// lldb::mutex_t The native mutex type for mutex objects on the host -// system. // lldb::thread_t The native thread type for spawned threads on the // system // lldb::thread_arg_t The type of the one any only thread creation @@ -34,32 +28,22 @@ // #define LLDB_INVALID_PROCESS_ID ... // #define LLDB_INVALID_THREAD_ID ... // #define LLDB_INVALID_HOST_THREAD ... -// #define IS_VALID_LLDB_HOST_THREAD ... //---------------------------------------------------------------------- // TODO: Add a bunch of ifdefs to determine the host system and what // things should be defined. Currently MacOSX is being assumed by default // since that is what lldb was first developed for. -#ifndef _MSC_VER -#include -#include -#endif - #ifdef _WIN32 #include namespace lldb { -typedef void *mutex_t; -typedef void *condition_t; typedef void *rwlock_t; typedef void *process_t; // Process type is HANDLE typedef void *thread_t; // Host thread type typedef void *file_t; // Host file type -typedef void *pipe_t; // Host pipe type typedef unsigned int __w64 socket_t; // Host socket type -typedef uint32_t thread_key_t; typedef void *thread_arg_t; // Host thread argument type typedef unsigned thread_result_t; // Host thread result type typedef thread_result_t (*thread_func_t)(void *); // Host thread function type @@ -73,15 +57,11 @@ namespace lldb { //---------------------------------------------------------------------- // MacOSX Types //---------------------------------------------------------------------- -typedef ::pthread_mutex_t mutex_t; -typedef pthread_cond_t condition_t; typedef pthread_rwlock_t rwlock_t; typedef uint64_t process_t; // Process type is just a pid. typedef pthread_t thread_t; // Host thread type typedef int file_t; // Host file type -typedef int pipe_t; // Host pipe type typedef int socket_t; // Host socket type -typedef pthread_key_t thread_key_t; typedef void *thread_arg_t; // Host thread argument type typedef void *thread_result_t; // Host thread result type typedef void *(*thread_func_t)(void *); // Host thread function type @@ -100,10 +80,6 @@ typedef bool (*ExpressionCancelCallback)(ExpressionEvaluationPhase phase, #define LLDB_INVALID_PROCESS ((lldb::process_t)-1) #define LLDB_INVALID_HOST_THREAD ((lldb::thread_t)NULL) -#define IS_VALID_LLDB_HOST_THREAD(t) ((t) != LLDB_INVALID_HOST_THREAD) - -#define LLDB_INVALID_HOST_TIME \ - { 0, 0 } namespace lldb { typedef uint64_t addr_t; diff --git a/contrib/llvm/tools/lldb/source/Host/common/File.cpp b/contrib/llvm/tools/lldb/source/Host/common/File.cpp index 90a4462c6ca9..6ee4e894756b 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/File.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/File.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #endif #include "llvm/Support/ConvertUTF.h" diff --git a/contrib/llvm/tools/lldb/source/Host/common/Host.cpp b/contrib/llvm/tools/lldb/source/Host/common/Host.cpp index 29e5991d31aa..8248aa3c5118 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/Host.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/Host.cpp @@ -46,6 +46,7 @@ #endif // C++ Includes +#include // Other libraries and framework includes // Project includes diff --git a/contrib/llvm/tools/lldb/source/Host/common/MainLoop.cpp b/contrib/llvm/tools/lldb/source/Host/common/MainLoop.cpp index c0c4471e735f..d0e0d00a3151 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/MainLoop.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/MainLoop.cpp @@ -10,6 +10,7 @@ #include "llvm/Config/llvm-config.h" #include "lldb/Host/MainLoop.h" +#include "lldb/Host/PosixApi.h" #include "lldb/Utility/Status.h" #include #include diff --git a/contrib/llvm/tools/lldb/source/Host/common/NativeRegisterContext.cpp b/contrib/llvm/tools/lldb/source/Host/common/NativeRegisterContext.cpp index 2ca95d707963..629b0247422d 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/NativeRegisterContext.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/NativeRegisterContext.cpp @@ -345,17 +345,12 @@ Status NativeRegisterContext::ReadRegisterValueFromMemory( return error; } - NativeProcessProtocolSP process_sp(m_thread.GetProcess()); - if (!process_sp) { - error.SetErrorString("invalid process"); - return error; - } - + NativeProcessProtocol &process = m_thread.GetProcess(); uint8_t src[RegisterValue::kMaxRegisterByteSize]; // Read the memory size_t bytes_read; - error = process_sp->ReadMemory(src_addr, src, src_len, bytes_read); + error = process.ReadMemory(src_addr, src, src_len, bytes_read); if (error.Fail()) return error; @@ -374,7 +369,7 @@ Status NativeRegisterContext::ReadRegisterValueFromMemory( // order of the memory data doesn't match the process. For now we are assuming // they are the same. lldb::ByteOrder byte_order; - if (!process_sp->GetByteOrder(byte_order)) { + if (process.GetByteOrder(byte_order)) { error.SetErrorString("NativeProcessProtocol::GetByteOrder () failed"); return error; } @@ -392,41 +387,37 @@ Status NativeRegisterContext::WriteRegisterValueToMemory( Status error; - NativeProcessProtocolSP process_sp(m_thread.GetProcess()); - if (process_sp) { + NativeProcessProtocol &process = m_thread.GetProcess(); - // TODO: we might need to add a parameter to this function in case the byte - // order of the memory data doesn't match the process. For now we are - // assuming - // they are the same. - lldb::ByteOrder byte_order; - if (!process_sp->GetByteOrder(byte_order)) - return Status("NativeProcessProtocol::GetByteOrder () failed"); + // TODO: we might need to add a parameter to this function in case the byte + // order of the memory data doesn't match the process. For now we are + // assuming + // they are the same. + lldb::ByteOrder byte_order; + if (!process.GetByteOrder(byte_order)) + return Status("NativeProcessProtocol::GetByteOrder () failed"); - const size_t bytes_copied = - reg_value.GetAsMemoryData(reg_info, dst, dst_len, byte_order, error); + const size_t bytes_copied = + reg_value.GetAsMemoryData(reg_info, dst, dst_len, byte_order, error); - if (error.Success()) { - if (bytes_copied == 0) { - error.SetErrorString("byte copy failed."); - } else { - size_t bytes_written; - error = - process_sp->WriteMemory(dst_addr, dst, bytes_copied, bytes_written); - if (error.Fail()) - return error; + if (error.Success()) { + if (bytes_copied == 0) { + error.SetErrorString("byte copy failed."); + } else { + size_t bytes_written; + error = process.WriteMemory(dst_addr, dst, bytes_copied, bytes_written); + if (error.Fail()) + return error; - if (bytes_written != bytes_copied) { - // This might happen if we read _some_ bytes but not all - error.SetErrorStringWithFormat("only wrote %" PRIu64 " of %" PRIu64 - " bytes", - static_cast(bytes_written), - static_cast(bytes_copied)); - } + if (bytes_written != bytes_copied) { + // This might happen if we read _some_ bytes but not all + error.SetErrorStringWithFormat("only wrote %" PRIu64 " of %" PRIu64 + " bytes", + static_cast(bytes_written), + static_cast(bytes_copied)); } } - } else - error.SetErrorString("invalid process"); + } return error; } diff --git a/contrib/llvm/tools/lldb/source/Host/common/NativeThreadProtocol.cpp b/contrib/llvm/tools/lldb/source/Host/common/NativeThreadProtocol.cpp index 29e25bbc5692..54ac96dd3c6f 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/NativeThreadProtocol.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/NativeThreadProtocol.cpp @@ -16,9 +16,9 @@ using namespace lldb; using namespace lldb_private; -NativeThreadProtocol::NativeThreadProtocol(NativeProcessProtocol *process, +NativeThreadProtocol::NativeThreadProtocol(NativeProcessProtocol &process, lldb::tid_t tid) - : m_process_wp(process->shared_from_this()), m_tid(tid) {} + : m_process(process), m_tid(tid) {} Status NativeThreadProtocol::ReadRegister(uint32_t reg, RegisterValue ®_value) { @@ -62,7 +62,3 @@ Status NativeThreadProtocol::RestoreAllRegisters(lldb::DataBufferSP &data_sp) { return Status("no register context"); return register_context_sp->ReadAllRegisterValues(data_sp); } - -NativeProcessProtocolSP NativeThreadProtocol::GetProcess() { - return m_process_wp.lock(); -} diff --git a/contrib/llvm/tools/lldb/source/Host/common/Socket.cpp b/contrib/llvm/tools/lldb/source/Host/common/Socket.cpp index 0df9dc02c70f..5490e9b30bda 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/Socket.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/Socket.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #endif #ifdef __linux__ diff --git a/contrib/llvm/tools/lldb/source/Host/common/SocketAddress.cpp b/contrib/llvm/tools/lldb/source/Host/common/SocketAddress.cpp index 41150fa7fd74..def3e0359f01 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/SocketAddress.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/SocketAddress.cpp @@ -317,6 +317,13 @@ bool SocketAddress::IsAnyAddr() const { : 0 == memcmp(&m_socket_addr.sa_ipv6.sin6_addr, &in6addr_any, 16); } +bool SocketAddress::IsLocalhost() const { + return (GetFamily() == AF_INET) + ? m_socket_addr.sa_ipv4.sin_addr.s_addr == htonl(INADDR_LOOPBACK) + : 0 == memcmp(&m_socket_addr.sa_ipv6.sin6_addr, &in6addr_loopback, + 16); +} + bool SocketAddress::operator==(const SocketAddress &rhs) const { if (GetFamily() != rhs.GetFamily()) return false; diff --git a/contrib/llvm/tools/lldb/source/Host/common/TCPSocket.cpp b/contrib/llvm/tools/lldb/source/Host/common/TCPSocket.cpp index c013334ce23a..a7af93f10a7f 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/TCPSocket.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/TCPSocket.cpp @@ -34,6 +34,7 @@ #define CLOSE_SOCKET closesocket typedef const char *set_socket_option_arg_type; #else +#include #define CLOSE_SOCKET ::close typedef const void *set_socket_option_arg_type; #endif diff --git a/contrib/llvm/tools/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp b/contrib/llvm/tools/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp index 105ef0f23d46..3797650105ce 100644 --- a/contrib/llvm/tools/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp +++ b/contrib/llvm/tools/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp @@ -30,6 +30,7 @@ #ifndef LLDB_DISABLE_POSIX #include +#include #endif // C++ Includes diff --git a/contrib/llvm/tools/lldb/source/Host/posix/FileSystem.cpp b/contrib/llvm/tools/lldb/source/Host/posix/FileSystem.cpp index e5a99e1aa754..3ece0677f991 100644 --- a/contrib/llvm/tools/lldb/source/Host/posix/FileSystem.cpp +++ b/contrib/llvm/tools/lldb/source/Host/posix/FileSystem.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef __linux__ #include #include diff --git a/contrib/llvm/tools/lldb/source/Host/posix/HostProcessPosix.cpp b/contrib/llvm/tools/lldb/source/Host/posix/HostProcessPosix.cpp index b5505dbec65b..3c5273f4bd3f 100644 --- a/contrib/llvm/tools/lldb/source/Host/posix/HostProcessPosix.cpp +++ b/contrib/llvm/tools/lldb/source/Host/posix/HostProcessPosix.cpp @@ -13,7 +13,9 @@ #include "llvm/ADT/STLExtras.h" +#include #include +#include using namespace lldb_private; diff --git a/contrib/llvm/tools/lldb/source/Host/posix/LockFilePosix.cpp b/contrib/llvm/tools/lldb/source/Host/posix/LockFilePosix.cpp index 2b7d548a021c..05423062bd44 100644 --- a/contrib/llvm/tools/lldb/source/Host/posix/LockFilePosix.cpp +++ b/contrib/llvm/tools/lldb/source/Host/posix/LockFilePosix.cpp @@ -10,6 +10,7 @@ #include "lldb/Host/posix/LockFilePosix.h" #include +#include using namespace lldb; using namespace lldb_private; diff --git a/contrib/llvm/tools/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp b/contrib/llvm/tools/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp index 0b40c24256ef..66c0229e0dab 100644 --- a/contrib/llvm/tools/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp +++ b/contrib/llvm/tools/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp @@ -19,8 +19,10 @@ #include #include #include +#include #include +#include #ifdef __ANDROID__ #include diff --git a/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp b/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp index 075f2e7b7bd1..986be7ffbf89 100644 --- a/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/contrib/llvm/tools/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2475,7 +2475,7 @@ void CommandInterpreter::HandleCommandsFromFile( } ScriptInterpreter *CommandInterpreter::GetScriptInterpreter(bool can_create) { - std::lock_guard locker(m_script_interpreter_mutex); + std::lock_guard locker(m_script_interpreter_mutex); if (!m_script_interpreter_sp) { if (!can_create) return nullptr; diff --git a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/AuxVector.cpp b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/AuxVector.cpp index 5dbb3bb4ef7e..7dd2b57da0cb 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/AuxVector.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/AuxVector.cpp @@ -7,24 +7,12 @@ // //===----------------------------------------------------------------------===// -// C Includes -#include -#include -#include - -// C++ Includes -// Other libraries and framework includes +#include "AuxVector.h" #include "lldb/Target/Process.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/Log.h" -#if defined(__linux__) || defined(__FreeBSD__) -#include "Plugins/Process/elf-core/ProcessElfCore.h" -#endif - -#include "AuxVector.h" - using namespace lldb; using namespace lldb_private; diff --git a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index 696bdf7e030d..bce0eaf6d57e 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -451,16 +451,16 @@ void ClangModulesDeclVendorImpl::ForEachMacro( bool first_arg = true; - for (clang::MacroInfo::arg_iterator ai = macro_info->arg_begin(), - ae = macro_info->arg_end(); - ai != ae; ++ai) { + for (auto pi = macro_info->param_begin(), + pe = macro_info->param_end(); + pi != pe; ++pi) { if (!first_arg) { macro_expansion.append(", "); } else { first_arg = false; } - macro_expansion.append((*ai)->getName().str()); + macro_expansion.append((*pi)->getName().str()); } if (macro_info->isC99Varargs()) { diff --git a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp index 5f0596cc9ad2..88bdd68ff301 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp @@ -274,6 +274,28 @@ bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { return true; } +bool CPlusPlusNameParser::ConsumeLambda() { + Bookmark start_position = SetBookmark(); + if (!ConsumeToken(tok::l_brace)) { + return false; + } + constexpr llvm::StringLiteral g_lambda("lambda"); + if (HasMoreTokens() && Peek().is(tok::raw_identifier) && + Peek().getRawIdentifier() == g_lambda) { + // Put the matched brace back so we can use ConsumeBrackets + TakeBack(); + } else { + return false; + } + + if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { + return false; + } + + start_position.Remove(); + return true; +} + bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, tok::TokenKind right) { Bookmark start_position = SetBookmark(); @@ -502,6 +524,15 @@ CPlusPlusNameParser::ParseFullNameImpl() { state = State::AfterTwoColons; break; } + case tok::l_brace: + if (state == State::Beginning || state == State::AfterTwoColons) { + if (ConsumeLambda()) { + state = State::AfterIdentifier; + break; + } + } + continue_parsing = false; + break; case tok::coloncolon: // Type nesting delimiter. if (state != State::Beginning && state != State::AfterIdentifier && state != State::AfterTemplate) { diff --git a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h index f936fb787c94..fe1d46f32c17 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h @@ -143,6 +143,9 @@ class CPlusPlusNameParser { // Consumes '(anonymous namespace)' bool ConsumeAnonymousNamespace(); + // Consumes '{lambda ...}' + bool ConsumeLambda(); + // Consumes operator declaration like 'operator *' or 'operator delete []' bool ConsumeOperator(); diff --git a/contrib/llvm/tools/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/contrib/llvm/tools/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp index 645bfdfa770d..759ec7fd1d29 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp @@ -23,6 +23,7 @@ #include "lldb/Host/ConnectionFileDescriptor.h" #include "lldb/Host/Host.h" #include "lldb/Host/HostInfo.h" +#include "lldb/Host/PosixApi.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" #include "lldb/Utility/FileSpec.h" diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp index b9ef02efa65d..388989a21f76 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp @@ -64,7 +64,7 @@ static Status EnsureFDFlags(int fd, int flags) { // Public Static Methods // ----------------------------------------------------------------------------- -llvm::Expected +llvm::Expected> NativeProcessNetBSD::Factory::Launch(ProcessLaunchInfo &launch_info, NativeDelegate &native_delegate, MainLoop &mainloop) const { @@ -101,24 +101,25 @@ NativeProcessNetBSD::Factory::Launch(ProcessLaunchInfo &launch_info, LLDB_LOG(log, "pid = {0:x}, detected architecture {1}", pid, arch.GetArchitectureName()); - std::shared_ptr process_sp(new NativeProcessNetBSD( + std::unique_ptr process_up(new NativeProcessNetBSD( pid, launch_info.GetPTY().ReleaseMasterFileDescriptor(), native_delegate, arch, mainloop)); - status = process_sp->ReinitializeThreads(); + status = process_up->ReinitializeThreads(); if (status.Fail()) return status.ToError(); - for (const auto &thread_sp : process_sp->m_threads) { + for (const auto &thread_sp : process_up->m_threads) { static_pointer_cast(thread_sp)->SetStoppedBySignal( SIGSTOP); } - process_sp->SetState(StateType::eStateStopped); + process_up->SetState(StateType::eStateStopped); - return process_sp; + return std::move(process_up); } -llvm::Expected NativeProcessNetBSD::Factory::Attach( +llvm::Expected> +NativeProcessNetBSD::Factory::Attach( lldb::pid_t pid, NativeProcessProtocol::NativeDelegate &native_delegate, MainLoop &mainloop) const { Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS)); @@ -130,14 +131,14 @@ llvm::Expected NativeProcessNetBSD::Factory::Attach( if (!status.Success()) return status.ToError(); - std::shared_ptr process_sp( + std::unique_ptr process_up( new NativeProcessNetBSD(pid, -1, native_delegate, arch, mainloop)); - status = process_sp->Attach(); + status = process_up->Attach(); if (!status.Success()) return status.ToError(); - return process_sp; + return std::move(process_up); } // ----------------------------------------------------------------------------- @@ -787,7 +788,7 @@ NativeThreadNetBSDSP NativeProcessNetBSD::AddThread(lldb::tid_t thread_id) { if (m_threads.empty()) SetCurrentThreadID(thread_id); - auto thread_sp = std::make_shared(this, thread_id); + auto thread_sp = std::make_shared(*this, thread_id); m_threads.push_back(thread_sp); return thread_sp; } diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h index 34b892f1fc88..2cbd5e30ab23 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h @@ -34,11 +34,11 @@ class NativeProcessNetBSD : public NativeProcessProtocol { public: class Factory : public NativeProcessProtocol::Factory { public: - llvm::Expected + llvm::Expected> Launch(ProcessLaunchInfo &launch_info, NativeDelegate &native_delegate, MainLoop &mainloop) const override; - llvm::Expected + llvm::Expected> Attach(lldb::pid_t pid, NativeDelegate &native_delegate, MainLoop &mainloop) const override; }; diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD.cpp index b442fc3462cc..dde86880c41a 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD.cpp @@ -104,15 +104,9 @@ Status NativeRegisterContextNetBSD::DoWriteDBR(void *buf) { } NativeProcessNetBSD &NativeRegisterContextNetBSD::GetProcess() { - auto process_sp = - std::static_pointer_cast(m_thread.GetProcess()); - assert(process_sp); - return *process_sp; + return static_cast(m_thread.GetProcess()); } ::pid_t NativeRegisterContextNetBSD::GetProcessPid() { - NativeProcessNetBSD &process = GetProcess(); - lldb::pid_t pid = process.GetID(); - - return pid; + return GetProcess().GetID(); } diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp index 8a16431b016d..1fd7400bf800 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp @@ -24,7 +24,7 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::process_netbsd; -NativeThreadNetBSD::NativeThreadNetBSD(NativeProcessNetBSD *process, +NativeThreadNetBSD::NativeThreadNetBSD(NativeProcessNetBSD &process, lldb::tid_t tid) : NativeThreadProtocol(process, tid), m_state(StateType::eStateInvalid), m_stop_info(), m_reg_context_sp(), m_stop_description() {} @@ -144,12 +144,8 @@ NativeRegisterContextSP NativeThreadNetBSD::GetRegisterContext() { if (m_reg_context_sp) return m_reg_context_sp; - NativeProcessProtocolSP m_process_sp = m_process_wp.lock(); - if (!m_process_sp) - return NativeRegisterContextSP(); - ArchSpec target_arch; - if (!m_process_sp->GetArchitecture(target_arch)) + if (!m_process.GetArchitecture(target_arch)) return NativeRegisterContextSP(); const uint32_t concrete_frame_idx = 0; diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.h b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.h index dcd360cdd310..1e3f587be5f5 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.h @@ -12,6 +12,7 @@ #include "lldb/Host/common/NativeThreadProtocol.h" +#include #include #include @@ -24,7 +25,7 @@ class NativeThreadNetBSD : public NativeThreadProtocol { friend class NativeProcessNetBSD; public: - NativeThreadNetBSD(NativeProcessNetBSD *process, lldb::tid_t tid); + NativeThreadNetBSD(NativeProcessNetBSD &process, lldb::tid_t tid); // --------------------------------------------------------------------- // NativeThreadProtocol Interface diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index a7fe4ee3b147..9294359dbef1 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -29,6 +29,7 @@ #include "lldb/Host/FileSystem.h" #include "lldb/Host/Host.h" #include "lldb/Host/HostInfo.h" +#include "lldb/Host/PosixApi.h" #include "lldb/Host/common/NativeProcessProtocol.h" #include "lldb/Host/common/NativeRegisterContext.h" #include "lldb/Host/common/NativeThreadProtocol.h" @@ -239,7 +240,7 @@ Status GDBRemoteCommunicationServerLLGS::LaunchProcess() { { std::lock_guard guard(m_debugged_process_mutex); - assert(!m_debugged_process_sp && "lldb-server creating debugged " + assert(!m_debugged_process_up && "lldb-server creating debugged " "process but one already exists"); auto process_or = m_process_factory.Launch(m_process_launch_info, *this, m_mainloop); @@ -250,7 +251,7 @@ Status GDBRemoteCommunicationServerLLGS::LaunchProcess() { m_process_launch_info.GetArguments().GetArgumentAtIndex(0), status); return status; } - m_debugged_process_sp = *process_or; + m_debugged_process_up = std::move(*process_or); } // Handle mirroring of inferior stdout/stderr over the gdb-remote protocol @@ -263,14 +264,13 @@ Status GDBRemoteCommunicationServerLLGS::LaunchProcess() { // nullptr means it's not redirected to file or pty (in case of LLGS local) // at least one of stdio will be transferred pty<->gdb-remote // we need to give the pty master handle to this object to read and/or write - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " setting up stdout/stderr redirection via $O gdb-remote commands", - __FUNCTION__, m_debugged_process_sp->GetID()); + LLDB_LOG(log, + "pid = {0}: setting up stdout/stderr redirection via $O " + "gdb-remote commands", + m_debugged_process_up->GetID()); // Setup stdout/stderr mapping from inferior to $O - auto terminal_fd = m_debugged_process_sp->GetTerminalFileDescriptor(); + auto terminal_fd = m_debugged_process_up->GetTerminalFileDescriptor(); if (terminal_fd >= 0) { if (log) log->Printf("ProcessGDBRemoteCommunicationServerLLGS::%s setting " @@ -286,16 +286,15 @@ Status GDBRemoteCommunicationServerLLGS::LaunchProcess() { __FUNCTION__, terminal_fd); } } else { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " skipping stdout/stderr redirection via $O: inferior will " - "communicate over client-provided file descriptors", - __FUNCTION__, m_debugged_process_sp->GetID()); + LLDB_LOG(log, + "pid = {0} skipping stdout/stderr redirection via $O: inferior " + "will communicate over client-provided file descriptors", + m_debugged_process_up->GetID()); } printf("Launched '%s' as process %" PRIu64 "...\n", m_process_launch_info.GetArguments().GetArgumentAtIndex(0), - m_debugged_process_sp->GetID()); + m_debugged_process_up->GetID()); return Status(); } @@ -308,12 +307,12 @@ Status GDBRemoteCommunicationServerLLGS::AttachToProcess(lldb::pid_t pid) { // Before we try to attach, make sure we aren't already monitoring something // else. - if (m_debugged_process_sp && - m_debugged_process_sp->GetID() != LLDB_INVALID_PROCESS_ID) + if (m_debugged_process_up && + m_debugged_process_up->GetID() != LLDB_INVALID_PROCESS_ID) return Status("cannot attach to a process %" PRIu64 " when another process with pid %" PRIu64 " is being debugged.", - pid, m_debugged_process_sp->GetID()); + pid, m_debugged_process_up->GetID()); // Try to attach. auto process_or = m_process_factory.Attach(pid, *this, m_mainloop); @@ -323,10 +322,10 @@ Status GDBRemoteCommunicationServerLLGS::AttachToProcess(lldb::pid_t pid) { status); return status; } - m_debugged_process_sp = *process_or; + m_debugged_process_up = std::move(*process_or); // Setup stdout/stderr mapping from inferior. - auto terminal_fd = m_debugged_process_sp->GetTerminalFileDescriptor(); + auto terminal_fd = m_debugged_process_up->GetTerminalFileDescriptor(); if (terminal_fd >= 0) { if (log) log->Printf("ProcessGDBRemoteCommunicationServerLLGS::%s setting " @@ -597,18 +596,15 @@ GDBRemoteCommunicationServerLLGS::SendStopReplyPacketForThread( Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_THREAD)); // Ensure we have a debugged process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(50); - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s preparing packet for pid %" PRIu64 - " tid %" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID(), tid); + LLDB_LOG(log, "preparing packet for pid {0} tid {1}", + m_debugged_process_up->GetID(), tid); // Ensure we can get info on the given thread. - NativeThreadProtocolSP thread_sp(m_debugged_process_sp->GetThreadByID(tid)); + NativeThreadProtocolSP thread_sp(m_debugged_process_up->GetThreadByID(tid)); if (!thread_sp) return SendErrorResponse(51); @@ -629,13 +625,11 @@ GDBRemoteCommunicationServerLLGS::SendStopReplyPacketForThread( // Output the T packet with the thread response.PutChar('T'); int signum = tid_stop_info.details.signal.signo; - if (log) { - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " tid %" PRIu64 - " got signal signo = %d, reason = %d, exc_type = %" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID(), tid, signum, - tid_stop_info.reason, tid_stop_info.details.exception.type); - } + LLDB_LOG( + log, + "pid {0}, tid {1}, got signal signo = {2}, reason = {3}, exc_type = {4}", + m_debugged_process_up->GetID(), tid, signum, int(tid_stop_info.reason), + tid_stop_info.details.exception.type); // Print the signal number. response.PutHex8(signum & 0xff); @@ -673,9 +667,9 @@ GDBRemoteCommunicationServerLLGS::SendStopReplyPacketForThread( uint32_t thread_index = 0; NativeThreadProtocolSP listed_thread_sp; for (listed_thread_sp = - m_debugged_process_sp->GetThreadAtIndex(thread_index); + m_debugged_process_up->GetThreadAtIndex(thread_index); listed_thread_sp; ++thread_index, - listed_thread_sp = m_debugged_process_sp->GetThreadAtIndex( + listed_thread_sp = m_debugged_process_up->GetThreadAtIndex( thread_index)) { if (thread_index > 0) response.PutChar(','); @@ -692,24 +686,23 @@ GDBRemoteCommunicationServerLLGS::SendStopReplyPacketForThread( if (thread_index > 0) { const bool threads_with_valid_stop_info_only = true; JSONArray::SP threads_info_sp = GetJSONThreadsInfo( - *m_debugged_process_sp, threads_with_valid_stop_info_only); + *m_debugged_process_up, threads_with_valid_stop_info_only); if (threads_info_sp) { response.PutCString("jstopinfo:"); StreamString unescaped_response; threads_info_sp->Write(unescaped_response); response.PutCStringAsRawHex8(unescaped_response.GetData()); response.PutChar(';'); - } else if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s failed to prepare a " - "jstopinfo field for pid %" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID()); + } else + LLDB_LOG(log, "failed to prepare a jstopinfo field for pid {0}", + m_debugged_process_up->GetID()); } uint32_t i = 0; response.PutCString("thread-pcs"); char delimiter = ':'; for (NativeThreadProtocolSP thread_sp; - (thread_sp = m_debugged_process_sp->GetThreadAtIndex(i)) != nullptr; + (thread_sp = m_debugged_process_up->GetThreadAtIndex(i)) != nullptr; ++i) { NativeRegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext(); if (!reg_ctx_sp) @@ -1069,8 +1062,8 @@ GDBRemoteCommunicationServerLLGS::Handle_jTraceStart( StringExtractorGDBRemote &packet) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(68); if (!packet.ConsumeFront("jTraceStart:")) @@ -1120,7 +1113,7 @@ GDBRemoteCommunicationServerLLGS::Handle_jTraceStart( Status error; lldb::user_id_t uid = LLDB_INVALID_UID; - uid = m_debugged_process_sp->StartTrace(options, error); + uid = m_debugged_process_up->StartTrace(options, error); LLDB_LOG(log, "uid is {0} , error is {1}", uid, error.GetError()); if (error.Fail()) return SendErrorResponse(error); @@ -1134,8 +1127,8 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_jTraceStop( StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(68); if (!packet.ConsumeFront("jTraceStop:")) @@ -1157,7 +1150,7 @@ GDBRemoteCommunicationServerLLGS::Handle_jTraceStop( json_dict->GetValueForKeyAsInteger("threadid", tid); - Status error = m_debugged_process_sp->StopTrace(uid, tid); + Status error = m_debugged_process_up->StopTrace(uid, tid); if (error.Fail()) return SendErrorResponse(error); @@ -1170,8 +1163,8 @@ GDBRemoteCommunicationServerLLGS::Handle_jTraceConfigRead( StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(68); if (!packet.ConsumeFront("jTraceConfigRead:")) @@ -1200,7 +1193,7 @@ GDBRemoteCommunicationServerLLGS::Handle_jTraceConfigRead( StreamGDBRemote response; options.setThreadID(threadid); - Status error = m_debugged_process_sp->GetTraceConfig(uid, options); + Status error = m_debugged_process_up->GetTraceConfig(uid, options); if (error.Fail()) return SendErrorResponse(error); @@ -1228,8 +1221,8 @@ GDBRemoteCommunicationServerLLGS::Handle_jTraceRead( StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(68); enum PacketType { MetaData, BufferData }; @@ -1274,9 +1267,9 @@ GDBRemoteCommunicationServerLLGS::Handle_jTraceRead( llvm::MutableArrayRef buf(buffer.get(), byte_count); if (tracetype == BufferData) - error = m_debugged_process_sp->GetData(uid, tid, buf, offset); + error = m_debugged_process_up->GetData(uid, tid, buf, offset); else if (tracetype == MetaData) - error = m_debugged_process_sp->GetMetaData(uid, tid, buf, offset); + error = m_debugged_process_up->GetMetaData(uid, tid, buf, offset); if (error.Fail()) return SendErrorResponse(error); @@ -1293,11 +1286,11 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_qProcessInfo( StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(68); - lldb::pid_t pid = m_debugged_process_sp->GetID(); + lldb::pid_t pid = m_debugged_process_up->GetID(); if (pid == LLDB_INVALID_PROCESS_ID) return SendErrorResponse(1); @@ -1314,16 +1307,16 @@ GDBRemoteCommunicationServerLLGS::Handle_qProcessInfo( GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_qC(StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(68); // Make sure we set the current thread so g and p packets return // the data the gdb will expect. - lldb::tid_t tid = m_debugged_process_sp->GetCurrentThreadID(); + lldb::tid_t tid = m_debugged_process_up->GetCurrentThreadID(); SetCurrentThreadID(tid); - NativeThreadProtocolSP thread_sp = m_debugged_process_sp->GetCurrentThread(); + NativeThreadProtocolSP thread_sp = m_debugged_process_up->GetCurrentThread(); if (!thread_sp) return SendErrorResponse(69); @@ -1339,20 +1332,15 @@ GDBRemoteCommunicationServerLLGS::Handle_k(StringExtractorGDBRemote &packet) { StopSTDIOForwarding(); - if (!m_debugged_process_sp) { - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s No debugged process found.", - __FUNCTION__); + if (!m_debugged_process_up) { + LLDB_LOG(log, "No debugged process found."); return PacketResult::Success; } - Status error = m_debugged_process_sp->Kill(); - if (error.Fail() && log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s Failed to kill debugged " - "process %" PRIu64 ": %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + Status error = m_debugged_process_up->Kill(); + if (error.Fail()) + LLDB_LOG(log, "Failed to kill debugged process {0}: {1}", + m_debugged_process_up->GetID(), error); // No OK response for kill packet. // return SendOKResponse (); @@ -1400,7 +1388,7 @@ GDBRemoteCommunicationServerLLGS::Handle_C(StringExtractorGDBRemote &packet) { log->Printf("GDBRemoteCommunicationServerLLGS::%s called", __FUNCTION__); // Ensure we have a native process. - if (!m_debugged_process_sp) { + if (!m_debugged_process_up) { if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s no debugged process " "shared pointer", @@ -1453,26 +1441,20 @@ GDBRemoteCommunicationServerLLGS::Handle_C(StringExtractorGDBRemote &packet) { } else { // Send the signal to the process since we weren't targeting a specific // continue thread with the signal. - error = m_debugged_process_sp->Signal(signo); + error = m_debugged_process_up->Signal(signo); if (error.Fail()) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s failed to send " - "signal for process %" PRIu64 ": %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "failed to send signal for process {0}: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x52); } } // Resume the threads. - error = m_debugged_process_sp->Resume(resume_actions); + error = m_debugged_process_up->Resume(resume_actions); if (error.Fail()) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s failed to resume " - "threads for process %" PRIu64 ": %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "failed to resume threads for process {0}: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x38); } @@ -1492,15 +1474,13 @@ GDBRemoteCommunicationServerLLGS::Handle_c(StringExtractorGDBRemote &packet) { // For now just support all continue. const bool has_continue_address = (packet.GetBytesLeft() > 0); if (has_continue_address) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s not implemented for " - "c{address} variant [%s remains]", - __FUNCTION__, packet.Peek()); + LLDB_LOG(log, "not implemented for c[address] variant [{0} remains]", + packet.Peek()); return SendUnimplementedResponse(packet.GetStringRef().c_str()); } // Ensure we have a native process. - if (!m_debugged_process_sp) { + if (!m_debugged_process_up) { if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s no debugged process " "shared pointer", @@ -1511,22 +1491,14 @@ GDBRemoteCommunicationServerLLGS::Handle_c(StringExtractorGDBRemote &packet) { // Build the ResumeActionList ResumeActionList actions(StateType::eStateRunning, 0); - Status error = m_debugged_process_sp->Resume(actions); + Status error = m_debugged_process_up->Resume(actions); if (error.Fail()) { - if (log) { - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s c failed for process %" PRIu64 - ": %s", - __FUNCTION__, m_debugged_process_sp->GetID(), error.AsCString()); - } + LLDB_LOG(log, "c failed for process {0}: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(GDBRemoteServerError::eErrorResume); } - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s continued process %" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID()); - + LLDB_LOG(log, "continued process {0}", m_debugged_process_up->GetID()); // No response required from continue. return PacketResult::Success; } @@ -1570,11 +1542,8 @@ GDBRemoteCommunicationServerLLGS::Handle_vCont( } // Ensure we have a native process. - if (!m_debugged_process_sp) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s no debugged process " - "shared pointer", - __FUNCTION__); + if (!m_debugged_process_up) { + LLDB_LOG(log, "no debugged process"); return SendErrorResponse(0x36); } @@ -1635,44 +1604,30 @@ GDBRemoteCommunicationServerLLGS::Handle_vCont( thread_actions.Append(thread_action); } - Status error = m_debugged_process_sp->Resume(thread_actions); + Status error = m_debugged_process_up->Resume(thread_actions); if (error.Fail()) { - if (log) { - log->Printf("GDBRemoteCommunicationServerLLGS::%s vCont failed for " - "process %" PRIu64 ": %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); - } + LLDB_LOG(log, "vCont failed for process {0}: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(GDBRemoteServerError::eErrorResume); } - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s continued process %" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID()); - + LLDB_LOG(log, "continued process {0}", m_debugged_process_up->GetID()); // No response required from vCont. return PacketResult::Success; } void GDBRemoteCommunicationServerLLGS::SetCurrentThreadID(lldb::tid_t tid) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD)); - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s setting current thread " - "id to %" PRIu64, - __FUNCTION__, tid); + LLDB_LOG(log, "setting current thread id to {0}", tid); m_current_tid = tid; - if (m_debugged_process_sp) - m_debugged_process_sp->SetCurrentThreadID(m_current_tid); + if (m_debugged_process_up) + m_debugged_process_up->SetCurrentThreadID(m_current_tid); } void GDBRemoteCommunicationServerLLGS::SetContinueThreadID(lldb::tid_t tid) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD)); - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s setting continue thread " - "id to %" PRIu64, - __FUNCTION__, tid); + LLDB_LOG(log, "setting continue thread id to {0}", tid); m_continue_tid = tid; } @@ -1683,10 +1638,10 @@ GDBRemoteCommunicationServerLLGS::Handle_stop_reason( // Handle the $? gdbremote command. // If no process, indicate error - if (!m_debugged_process_sp) + if (!m_debugged_process_up) return SendErrorResponse(02); - return SendStopReasonForState(m_debugged_process_sp->GetState()); + return SendStopReasonForState(m_debugged_process_up->GetState()); } GDBRemoteCommunication::PacketResult @@ -1707,7 +1662,7 @@ GDBRemoteCommunicationServerLLGS::SendStopReasonForState( case eStateSuspended: case eStateStopped: case eStateCrashed: { - lldb::tid_t tid = m_debugged_process_sp->GetCurrentThreadID(); + lldb::tid_t tid = m_debugged_process_up->GetCurrentThreadID(); // Make sure we set the current thread so g and p packets return // the data the gdb will expect. SetCurrentThreadID(tid); @@ -1717,15 +1672,11 @@ GDBRemoteCommunicationServerLLGS::SendStopReasonForState( case eStateInvalid: case eStateUnloaded: case eStateExited: - return SendWResponse(m_debugged_process_sp.get()); + return SendWResponse(m_debugged_process_up.get()); default: - if (log) { - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - ", current state reporting not handled: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - StateAsCString(process_state)); - } + LLDB_LOG(log, "pid {0}, current state reporting not handled: {1}", + m_debugged_process_up->GetID(), process_state); break; } @@ -1736,12 +1687,12 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_qRegisterInfo( StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(68); // Ensure we have a thread. - NativeThreadProtocolSP thread_sp(m_debugged_process_sp->GetThreadAtIndex(0)); + NativeThreadProtocolSP thread_sp(m_debugged_process_up->GetThreadAtIndex(0)); if (!thread_sp) return SendErrorResponse(69); @@ -1945,47 +1896,33 @@ GDBRemoteCommunicationServerLLGS::Handle_qfThreadInfo( Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD)); // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s() no process (%s), " - "returning OK", - __FUNCTION__, - m_debugged_process_sp ? "invalid process id" - : "null m_debugged_process_sp"); + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { + LLDB_LOG(log, "no process ({0}), returning OK", + m_debugged_process_up ? "invalid process id" + : "null m_debugged_process_up"); return SendOKResponse(); } StreamGDBRemote response; response.PutChar('m'); - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s() starting thread iteration", - __FUNCTION__); - + LLDB_LOG(log, "starting thread iteration"); NativeThreadProtocolSP thread_sp; uint32_t thread_index; for (thread_index = 0, - thread_sp = m_debugged_process_sp->GetThreadAtIndex(thread_index); + thread_sp = m_debugged_process_up->GetThreadAtIndex(thread_index); thread_sp; ++thread_index, - thread_sp = m_debugged_process_sp->GetThreadAtIndex(thread_index)) { - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s() iterated thread %" PRIu32 - "(%s, tid=0x%" PRIx64 ")", - __FUNCTION__, thread_index, thread_sp ? "is not null" : "null", - thread_sp ? thread_sp->GetID() : LLDB_INVALID_THREAD_ID); + thread_sp = m_debugged_process_up->GetThreadAtIndex(thread_index)) { + LLDB_LOG(log, "iterated thread {0}({1}, tid={2})", thread_index, + thread_sp ? "is not null" : "null", + thread_sp ? thread_sp->GetID() : LLDB_INVALID_THREAD_ID); if (thread_index > 0) response.PutChar(','); response.Printf("%" PRIx64, thread_sp->GetID()); } - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s() finished thread iteration", - __FUNCTION__); - + LLDB_LOG(log, "finished thread iteration"); return SendPacketNoLock(response.GetString()); } @@ -2026,11 +1963,10 @@ GDBRemoteCommunicationServerLLGS::Handle_p(StringExtractorGDBRemote &packet) { // Get the thread's register context. NativeRegisterContextSP reg_context_sp(thread_sp->GetRegisterContext()); if (!reg_context_sp) { - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 " tid %" PRIu64 - " failed, no register context available for the thread", - __FUNCTION__, m_debugged_process_sp->GetID(), thread_sp->GetID()); + LLDB_LOG( + log, + "pid {0} tid {1} failed, no register context available for the thread", + m_debugged_process_up->GetID(), thread_sp->GetID()); return SendErrorResponse(0x15); } @@ -2113,8 +2049,8 @@ GDBRemoteCommunicationServerLLGS::Handle_P(StringExtractorGDBRemote &packet) { // Get process architecture. ArchSpec process_arch; - if (!m_debugged_process_sp || - !m_debugged_process_sp->GetArchitecture(process_arch)) { + if (!m_debugged_process_up || + !m_debugged_process_up->GetArchitecture(process_arch)) { if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s failed to retrieve " "inferior architecture", @@ -2143,7 +2079,7 @@ GDBRemoteCommunicationServerLLGS::Handle_P(StringExtractorGDBRemote &packet) { log->Printf( "GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 " tid %" PRIu64 " failed, no register context available for the thread", - __FUNCTION__, m_debugged_process_sp->GetID(), thread_sp->GetID()); + __FUNCTION__, m_debugged_process_up->GetID(), thread_sp->GetID()); return SendErrorResponse(0x15); } @@ -2197,8 +2133,8 @@ GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD)); // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2241,7 +2177,7 @@ GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { // Ensure we have the given thread when not specifying -1 (all threads) or 0 // (any thread). if (tid != LLDB_INVALID_THREAD_ID && tid != 0) { - NativeThreadProtocolSP thread_sp(m_debugged_process_sp->GetThreadByID(tid)); + NativeThreadProtocolSP thread_sp(m_debugged_process_up->GetThreadByID(tid)); if (!thread_sp) { if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s failed, tid %" PRIu64 @@ -2275,8 +2211,8 @@ GDBRemoteCommunicationServerLLGS::Handle_I(StringExtractorGDBRemote &packet) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD)); // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2311,30 +2247,21 @@ GDBRemoteCommunicationServerLLGS::Handle_interrupt( Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_THREAD)); // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s failed, no process available", - __FUNCTION__); + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { + LLDB_LOG(log, "failed, no process available"); return SendErrorResponse(0x15); } // Interrupt the process. - Status error = m_debugged_process_sp->Interrupt(); + Status error = m_debugged_process_up->Interrupt(); if (error.Fail()) { - if (log) { - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s failed for process %" PRIu64 - ": %s", - __FUNCTION__, m_debugged_process_sp->GetID(), error.AsCString()); - } + LLDB_LOG(log, "failed for process {0}: {1}", m_debugged_process_up->GetID(), + error); return SendErrorResponse(GDBRemoteServerError::eErrorResume); } - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s stopped process %" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID()); + LLDB_LOG(log, "stopped process {0}", m_debugged_process_up->GetID()); // No response required from stop all. return PacketResult::Success; @@ -2345,8 +2272,8 @@ GDBRemoteCommunicationServerLLGS::Handle_memory_read( StringExtractorGDBRemote &packet) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2388,13 +2315,13 @@ GDBRemoteCommunicationServerLLGS::Handle_memory_read( // Retrieve the process memory. size_t bytes_read = 0; - Status error = m_debugged_process_sp->ReadMemoryWithoutTrap( + Status error = m_debugged_process_up->ReadMemoryWithoutTrap( read_addr, &buf[0], byte_count, bytes_read); if (error.Fail()) { if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 " mem 0x%" PRIx64 ": failed to read. Error: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), read_addr, + __FUNCTION__, m_debugged_process_up->GetID(), read_addr, error.AsCString()); return SendErrorResponse(0x08); } @@ -2403,7 +2330,7 @@ GDBRemoteCommunicationServerLLGS::Handle_memory_read( if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 " mem 0x%" PRIx64 ": read 0 of %" PRIu64 " requested bytes", - __FUNCTION__, m_debugged_process_sp->GetID(), read_addr, + __FUNCTION__, m_debugged_process_up->GetID(), read_addr, byte_count); return SendErrorResponse(0x08); } @@ -2426,8 +2353,8 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_M(StringExtractorGDBRemote &packet) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2455,10 +2382,7 @@ GDBRemoteCommunicationServerLLGS::Handle_M(StringExtractorGDBRemote &packet) { const uint64_t byte_count = packet.GetHexMaxU64(false, 0); if (byte_count == 0) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s nothing to write: " - "zero-length packet", - __FUNCTION__); + LLDB_LOG(log, "nothing to write: zero-length packet"); return PacketResult::Success; } @@ -2476,12 +2400,11 @@ GDBRemoteCommunicationServerLLGS::Handle_M(StringExtractorGDBRemote &packet) { StreamGDBRemote response; const uint64_t convert_count = packet.GetHexBytes(buf, 0); if (convert_count != byte_count) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " mem 0x%" PRIx64 ": asked to write %" PRIu64 - " bytes, but only found %" PRIu64 " to convert.", - __FUNCTION__, m_debugged_process_sp->GetID(), write_addr, - byte_count, convert_count); + LLDB_LOG(log, + "pid {0} mem {1:x}: asked to write {2} bytes, but only found {3} " + "to convert.", + m_debugged_process_up->GetID(), write_addr, byte_count, + convert_count); return SendIllFormedResponse(packet, "M content byte length specified did " "not match hex-encoded content " "length"); @@ -2489,23 +2412,17 @@ GDBRemoteCommunicationServerLLGS::Handle_M(StringExtractorGDBRemote &packet) { // Write the process memory. size_t bytes_written = 0; - Status error = m_debugged_process_sp->WriteMemory(write_addr, &buf[0], + Status error = m_debugged_process_up->WriteMemory(write_addr, &buf[0], byte_count, bytes_written); if (error.Fail()) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " mem 0x%" PRIx64 ": failed to write. Error: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), write_addr, - error.AsCString()); + LLDB_LOG(log, "pid {0} mem {1:x}: failed to write. Error: {2}", + m_debugged_process_up->GetID(), write_addr, error); return SendErrorResponse(0x09); } if (bytes_written == 0) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " mem 0x%" PRIx64 ": wrote 0 of %" PRIu64 " requested bytes", - __FUNCTION__, m_debugged_process_sp->GetID(), write_addr, - byte_count); + LLDB_LOG(log, "pid {0} mem {1:x}: wrote 0 of {2} requested bytes", + m_debugged_process_up->GetID(), write_addr, byte_count); return SendErrorResponse(0x09); } @@ -2525,8 +2442,8 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemoryRegionInfoSupported( // Ensure we have a process running; otherwise, we can't figure this out // since we won't have a NativeProcessProtocol. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2537,7 +2454,7 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemoryRegionInfoSupported( // Test if we can get any region back when asking for the region around NULL. MemoryRegionInfo region_info; const Status error = - m_debugged_process_sp->GetMemoryRegionInfo(0, region_info); + m_debugged_process_up->GetMemoryRegionInfo(0, region_info); if (error.Fail()) { // We don't support memory region info collection for this // NativeProcessProtocol. @@ -2553,8 +2470,8 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemoryRegionInfo( Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); // Ensure we have a process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2575,7 +2492,7 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemoryRegionInfo( // Get the memory region info for the target address. MemoryRegionInfo region_info; const Status error = - m_debugged_process_sp->GetMemoryRegionInfo(read_addr, region_info); + m_debugged_process_up->GetMemoryRegionInfo(read_addr, region_info); if (error.Fail()) { // Return the error message. @@ -2619,13 +2536,10 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemoryRegionInfo( GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_Z(StringExtractorGDBRemote &packet) { // Ensure we have a process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s failed, no process available", - __FUNCTION__); + LLDB_LOG(log, "failed, no process available"); return SendErrorResponse(0x15); } @@ -2693,28 +2607,22 @@ GDBRemoteCommunicationServerLLGS::Handle_Z(StringExtractorGDBRemote &packet) { if (want_breakpoint) { // Try to set the breakpoint. const Status error = - m_debugged_process_sp->SetBreakpoint(addr, size, want_hardware); + m_debugged_process_up->SetBreakpoint(addr, size, want_hardware); if (error.Success()) return SendOKResponse(); Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_BREAKPOINTS)); - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " failed to set breakpoint: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "pid {0} failed to set breakpoint: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x09); } else { // Try to set the watchpoint. - const Status error = m_debugged_process_sp->SetWatchpoint( + const Status error = m_debugged_process_up->SetWatchpoint( addr, size, watch_flags, want_hardware); if (error.Success()) return SendOKResponse(); Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_WATCHPOINTS)); - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " failed to set watchpoint: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "pid {0} failed to set watchpoint: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x09); } } @@ -2722,13 +2630,10 @@ GDBRemoteCommunicationServerLLGS::Handle_Z(StringExtractorGDBRemote &packet) { GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_z(StringExtractorGDBRemote &packet) { // Ensure we have a process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s failed, no process available", - __FUNCTION__); + LLDB_LOG(log, "failed, no process available"); return SendErrorResponse(0x15); } @@ -2790,27 +2695,21 @@ GDBRemoteCommunicationServerLLGS::Handle_z(StringExtractorGDBRemote &packet) { if (want_breakpoint) { // Try to clear the breakpoint. const Status error = - m_debugged_process_sp->RemoveBreakpoint(addr, want_hardware); + m_debugged_process_up->RemoveBreakpoint(addr, want_hardware); if (error.Success()) return SendOKResponse(); Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_BREAKPOINTS)); - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " failed to remove breakpoint: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "pid {0} failed to remove breakpoint: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x09); } else { // Try to clear the watchpoint. - const Status error = m_debugged_process_sp->RemoveWatchpoint(addr); + const Status error = m_debugged_process_up->RemoveWatchpoint(addr); if (error.Success()) return SendOKResponse(); Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_WATCHPOINTS)); - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " failed to remove watchpoint: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "pid {0} failed to remove watchpoint: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x09); } } @@ -2820,8 +2719,8 @@ GDBRemoteCommunicationServerLLGS::Handle_s(StringExtractorGDBRemote &packet) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_THREAD)); // Ensure we have a process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2840,7 +2739,7 @@ GDBRemoteCommunicationServerLLGS::Handle_s(StringExtractorGDBRemote &packet) { // Double check that we have such a thread. // TODO investigate: on MacOSX we might need to do an UpdateThreads () here. - NativeThreadProtocolSP thread_sp = m_debugged_process_sp->GetThreadByID(tid); + NativeThreadProtocolSP thread_sp = m_debugged_process_up->GetThreadByID(tid); if (!thread_sp || thread_sp->GetID() != tid) return SendErrorResponse(0x33); @@ -2853,12 +2752,12 @@ GDBRemoteCommunicationServerLLGS::Handle_s(StringExtractorGDBRemote &packet) { // All other threads stop while we're single stepping a thread. actions.SetDefaultThreadActionIfNeeded(eStateStopped, 0); - Status error = m_debugged_process_sp->Resume(actions); + Status error = m_debugged_process_up->Resume(actions); if (error.Fail()) { if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 " tid %" PRIu64 " Resume() failed with error: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), tid, + __FUNCTION__, m_debugged_process_up->GetID(), tid, error.AsCString()); return SendErrorResponse(0x49); } @@ -2901,8 +2800,8 @@ GDBRemoteCommunicationServerLLGS::Handle_qXfer_auxv_read( // Grab the auxv data if we need it. if (!m_active_auxv_buffer_up) { // Make sure we have a valid process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -2911,7 +2810,7 @@ GDBRemoteCommunicationServerLLGS::Handle_qXfer_auxv_read( } // Grab the auxv data. - auto buffer_or_error = m_debugged_process_sp->GetAuxvData(); + auto buffer_or_error = m_debugged_process_up->GetAuxvData(); if (!buffer_or_error) { std::error_code ec = buffer_or_error.getError(); LLDB_LOG(log, "no auxv data retrieved: {0}", ec.message()); @@ -2979,11 +2878,10 @@ GDBRemoteCommunicationServerLLGS::Handle_QSaveRegisterState( // Grab the register context for the thread. NativeRegisterContextSP reg_context_sp(thread_sp->GetRegisterContext()); if (!reg_context_sp) { - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 " tid %" PRIu64 - " failed, no register context available for the thread", - __FUNCTION__, m_debugged_process_sp->GetID(), thread_sp->GetID()); + LLDB_LOG( + log, + "pid {0} tid {1} failed, no register context available for the thread", + m_debugged_process_up->GetID(), thread_sp->GetID()); return SendErrorResponse(0x15); } @@ -2991,11 +2889,8 @@ GDBRemoteCommunicationServerLLGS::Handle_QSaveRegisterState( DataBufferSP register_data_sp; Status error = reg_context_sp->ReadAllRegisterValues(register_data_sp); if (error.Fail()) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " failed to save all register values: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "pid {0} failed to save all register values: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x75); } @@ -3029,10 +2924,8 @@ GDBRemoteCommunicationServerLLGS::Handle_QRestoreRegisterState( const uint32_t save_id = packet.GetU32(0); if (save_id == 0) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s QRestoreRegisterState " - "packet has malformed save id, expecting decimal uint32_t", - __FUNCTION__); + LLDB_LOG(log, "QRestoreRegisterState packet has malformed save id, " + "expecting decimal uint32_t"); return SendErrorResponse(0x76); } @@ -3050,11 +2943,10 @@ GDBRemoteCommunicationServerLLGS::Handle_QRestoreRegisterState( // Grab the register context for the thread. NativeRegisterContextSP reg_context_sp(thread_sp->GetRegisterContext()); if (!reg_context_sp) { - if (log) - log->Printf( - "GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 " tid %" PRIu64 - " failed, no register context available for the thread", - __FUNCTION__, m_debugged_process_sp->GetID(), thread_sp->GetID()); + LLDB_LOG( + log, + "pid {0} tid {1} failed, no register context available for the thread", + m_debugged_process_up->GetID(), thread_sp->GetID()); return SendErrorResponse(0x15); } @@ -3066,10 +2958,9 @@ GDBRemoteCommunicationServerLLGS::Handle_QRestoreRegisterState( // Find the register set buffer for the given save id. auto it = m_saved_registers_map.find(save_id); if (it == m_saved_registers_map.end()) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " does not have a register set save buffer for id %" PRIu32, - __FUNCTION__, m_debugged_process_sp->GetID(), save_id); + LLDB_LOG(log, + "pid {0} does not have a register set save buffer for id {1}", + m_debugged_process_up->GetID(), save_id); return SendErrorResponse(0x77); } register_data_sp = it->second; @@ -3080,11 +2971,8 @@ GDBRemoteCommunicationServerLLGS::Handle_QRestoreRegisterState( Status error = reg_context_sp->WriteAllRegisterValues(register_data_sp); if (error.Fail()) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s pid %" PRIu64 - " failed to restore all register values: %s", - __FUNCTION__, m_debugged_process_sp->GetID(), - error.AsCString()); + LLDB_LOG(log, "pid {0} failed to restore all register values: {1}", + m_debugged_process_up->GetID(), error); return SendErrorResponse(0x77); } @@ -3124,7 +3012,7 @@ GDBRemoteCommunicationServerLLGS::Handle_vAttach( } // Notify we attached by sending a stop packet. - return SendStopReasonForState(m_debugged_process_sp->GetState()); + return SendStopReasonForState(m_debugged_process_up->GetState()); } GDBRemoteCommunication::PacketResult @@ -3134,8 +3022,8 @@ GDBRemoteCommunicationServerLLGS::Handle_D(StringExtractorGDBRemote &packet) { StopSTDIOForwarding(); // Fail if we don't have a current process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) { + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) { if (log) log->Printf( "GDBRemoteCommunicationServerLLGS::%s failed, no process available", @@ -3157,16 +3045,16 @@ GDBRemoteCommunicationServerLLGS::Handle_D(StringExtractorGDBRemote &packet) { return SendIllFormedResponse(packet, "D failed to parse the process id"); } - if (pid != LLDB_INVALID_PROCESS_ID && m_debugged_process_sp->GetID() != pid) { + if (pid != LLDB_INVALID_PROCESS_ID && m_debugged_process_up->GetID() != pid) { return SendIllFormedResponse(packet, "Invalid pid"); } - const Status error = m_debugged_process_sp->Detach(); + const Status error = m_debugged_process_up->Detach(); if (error.Fail()) { if (log) log->Printf("GDBRemoteCommunicationServerLLGS::%s failed to detach from " "pid %" PRIu64 ": %s\n", - __FUNCTION__, m_debugged_process_sp->GetID(), + __FUNCTION__, m_debugged_process_up->GetID(), error.AsCString()); return SendErrorResponse(0x01); } @@ -3197,24 +3085,18 @@ GDBRemoteCommunicationServerLLGS::Handle_jThreadsInfo( Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_THREAD)); // Ensure we have a debugged process. - if (!m_debugged_process_sp || - (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + if (!m_debugged_process_up || + (m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID)) return SendErrorResponse(50); - - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s preparing packet for pid " - "%" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID()); + LLDB_LOG(log, "preparing packet for pid {0}", m_debugged_process_up->GetID()); StreamString response; const bool threads_with_valid_stop_info_only = false; JSONArray::SP threads_array_sp = GetJSONThreadsInfo( - *m_debugged_process_sp, threads_with_valid_stop_info_only); + *m_debugged_process_up, threads_with_valid_stop_info_only); if (!threads_array_sp) { - if (log) - log->Printf("GDBRemoteCommunicationServerLLGS::%s failed to prepare a " - "packet for pid %" PRIu64, - __FUNCTION__, m_debugged_process_sp->GetID()); + LLDB_LOG(log, "failed to prepare a packet for pid {0}", + m_debugged_process_up->GetID()); return SendErrorResponse(52); } @@ -3228,8 +3110,8 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_qWatchpointSupportInfo( StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID) + if (!m_debugged_process_up || + m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID) return SendErrorResponse(68); packet.SetFilePos(strlen("qWatchpointSupportInfo")); @@ -3238,7 +3120,7 @@ GDBRemoteCommunicationServerLLGS::Handle_qWatchpointSupportInfo( if (packet.GetChar() != ':') return SendErrorResponse(67); - auto hw_debug_cap = m_debugged_process_sp->GetHardwareDebugSupportInfo(); + auto hw_debug_cap = m_debugged_process_up->GetHardwareDebugSupportInfo(); StreamGDBRemote response; if (hw_debug_cap == llvm::None) @@ -3253,8 +3135,8 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_qFileLoadAddress( StringExtractorGDBRemote &packet) { // Fail if we don't have a current process. - if (!m_debugged_process_sp || - m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID) + if (!m_debugged_process_up || + m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID) return SendErrorResponse(67); packet.SetFilePos(strlen("qFileLoadAddress:")); @@ -3266,7 +3148,7 @@ GDBRemoteCommunicationServerLLGS::Handle_qFileLoadAddress( lldb::addr_t file_load_address = LLDB_INVALID_ADDRESS; Status error = - m_debugged_process_sp->GetFileLoadAddress(file_name, file_load_address); + m_debugged_process_up->GetFileLoadAddress(file_name, file_load_address); if (error.Fail()) return SendErrorResponse(69); @@ -3302,10 +3184,10 @@ GDBRemoteCommunicationServerLLGS::Handle_QPassSignals( } // Fail if we don't have a current process. - if (!m_debugged_process_sp) + if (!m_debugged_process_up) return SendErrorResponse(68); - Status error = m_debugged_process_sp->IgnoreSignals(signals); + Status error = m_debugged_process_up->IgnoreSignals(signals); if (error.Fail()) return SendErrorResponse(69); @@ -3342,8 +3224,8 @@ NativeThreadProtocolSP GDBRemoteCommunicationServerLLGS::GetThreadFromSuffix( NativeThreadProtocolSP thread_sp; // We have no thread if we don't have a process. - if (!m_debugged_process_sp || - m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID) + if (!m_debugged_process_up || + m_debugged_process_up->GetID() == LLDB_INVALID_PROCESS_ID) return thread_sp; // If the client hasn't asked for thread suffix support, there will not be a @@ -3355,9 +3237,9 @@ NativeThreadProtocolSP GDBRemoteCommunicationServerLLGS::GetThreadFromSuffix( return thread_sp; else if (current_tid == 0) { // Pick a thread. - return m_debugged_process_sp->GetThreadAtIndex(0); + return m_debugged_process_up->GetThreadAtIndex(0); } else - return m_debugged_process_sp->GetThreadByID(current_tid); + return m_debugged_process_up->GetThreadByID(current_tid); } Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD)); @@ -3387,7 +3269,7 @@ NativeThreadProtocolSP GDBRemoteCommunicationServerLLGS::GetThreadFromSuffix( packet.SetFilePos(packet.GetFilePos() + strlen("thread:")); const lldb::tid_t tid = packet.GetHexMaxU64(false, 0); if (tid != 0) - return m_debugged_process_sp->GetThreadByID(tid); + return m_debugged_process_up->GetThreadByID(tid); return thread_sp; } @@ -3397,9 +3279,9 @@ lldb::tid_t GDBRemoteCommunicationServerLLGS::GetCurrentThreadID() const { // Use whatever the debug process says is the current thread id // since the protocol either didn't specify or specified we want // any/all threads marked as the current thread. - if (!m_debugged_process_sp) + if (!m_debugged_process_up) return LLDB_INVALID_THREAD_ID; - return m_debugged_process_sp->GetCurrentThreadID(); + return m_debugged_process_up->GetCurrentThreadID(); } // Use the specific current thread id set by the gdb remote protocol. return m_current_tid; @@ -3420,9 +3302,9 @@ void GDBRemoteCommunicationServerLLGS::ClearProcessSpecificData() { FileSpec GDBRemoteCommunicationServerLLGS::FindModuleFile(const std::string &module_path, const ArchSpec &arch) { - if (m_debugged_process_sp) { + if (m_debugged_process_up) { FileSpec file_spec; - if (m_debugged_process_sp + if (m_debugged_process_up ->GetLoadedModuleFileSpec(module_path.c_str(), file_spec) .Success()) { if (file_spec.Exists()) diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h index b065642d4aed..71199473bb8e 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h @@ -114,7 +114,7 @@ class GDBRemoteCommunicationServerLLGS lldb::tid_t m_current_tid = LLDB_INVALID_THREAD_ID; lldb::tid_t m_continue_tid = LLDB_INVALID_THREAD_ID; std::recursive_mutex m_debugged_process_mutex; - NativeProcessProtocolSP m_debugged_process_sp; + std::unique_ptr m_debugged_process_up; Communication m_stdio_communication; MainLoop::ReadHandleUP m_stdio_handle_up; diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 8b77a282bd73..e46bbeb18ba0 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -16,6 +16,7 @@ #include #include // for mmap #include +#include #endif #include #include @@ -40,6 +41,7 @@ #include "lldb/Host/ConnectionFileDescriptor.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostThread.h" +#include "lldb/Host/PosixApi.h" #include "lldb/Host/PseudoTerminal.h" #include "lldb/Host/StringConvert.h" #include "lldb/Host/Symbols.h" diff --git a/contrib/llvm/tools/lldb/source/Target/Target.cpp b/contrib/llvm/tools/lldb/source/Target/Target.cpp index 4632ada26ed3..d97f651ca08b 100644 --- a/contrib/llvm/tools/lldb/source/Target/Target.cpp +++ b/contrib/llvm/tools/lldb/source/Target/Target.cpp @@ -34,6 +34,7 @@ #include "lldb/Expression/REPL.h" #include "lldb/Expression/UserExpression.h" #include "lldb/Host/Host.h" +#include "lldb/Host/PosixApi.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionGroupWatchpoint.h" diff --git a/contrib/llvm/tools/lldb/tools/driver/Driver.cpp b/contrib/llvm/tools/lldb/tools/driver/Driver.cpp index 8e226a97af35..102ba775da91 100644 --- a/contrib/llvm/tools/lldb/tools/driver/Driver.cpp +++ b/contrib/llvm/tools/lldb/tools/driver/Driver.cpp @@ -9,6 +9,7 @@ #include "Driver.h" +#include #include #include #include diff --git a/contrib/llvm/tools/lldb/tools/lldb-mi/MICmnLLDBDebugger.cpp b/contrib/llvm/tools/lldb/tools/lldb-mi/MICmnLLDBDebugger.cpp index a676ecc92602..b102cab9f93a 100644 --- a/contrib/llvm/tools/lldb/tools/lldb-mi/MICmnLLDBDebugger.cpp +++ b/contrib/llvm/tools/lldb/tools/lldb-mi/MICmnLLDBDebugger.cpp @@ -17,6 +17,7 @@ #include "lldb/API/SBTypeCategory.h" #include "lldb/API/SBTypeNameSpecifier.h" #include "lldb/API/SBTypeSummary.h" +#include // In-house headers: #include "MICmnLLDBDebugSessionInfo.h" diff --git a/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriver.cpp b/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriver.cpp index 49e8588bf732..7bd6b7b2e166 100644 --- a/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriver.cpp +++ b/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriver.cpp @@ -9,6 +9,8 @@ // Third party headers: #include "lldb/API/SBError.h" +#include +#include #include // In-house headers: diff --git a/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriverMain.cpp b/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriverMain.cpp index fdced8dd4ea2..be01f1d97790 100644 --- a/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriverMain.cpp +++ b/contrib/llvm/tools/lldb/tools/lldb-mi/MIDriverMain.cpp @@ -33,6 +33,7 @@ // Third party headers: #include "lldb/API/SBHostOS.h" +#include #include // In house headers: diff --git a/contrib/llvm/tools/lldb/tools/lldb-server/lldb-gdbserver.cpp b/contrib/llvm/tools/lldb/tools/lldb-server/lldb-gdbserver.cpp index 337f244c2c2d..f1a9b113c8ee 100644 --- a/contrib/llvm/tools/lldb/tools/lldb-server/lldb-gdbserver.cpp +++ b/contrib/llvm/tools/lldb/tools/lldb-server/lldb-gdbserver.cpp @@ -67,13 +67,13 @@ typedef process_netbsd::NativeProcessNetBSD::Factory NativeProcessFactory; // Dummy implementation to make sure the code compiles class NativeProcessFactory : public NativeProcessProtocol::Factory { public: - llvm::Expected + llvm::Expected> Launch(ProcessLaunchInfo &launch_info, NativeProcessProtocol::NativeDelegate &delegate, MainLoop &mainloop) const override { llvm_unreachable("Not implemented"); } - llvm::Expected + llvm::Expected> Attach(lldb::pid_t pid, NativeProcessProtocol::NativeDelegate &delegate, MainLoop &mainloop) const override { llvm_unreachable("Not implemented"); diff --git a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp index 500507fd4966..af4d3efa52f7 100644 --- a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h" #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" @@ -863,6 +864,9 @@ int main(int argc, char **argv) { llvm::InitializeAllAsmParsers(); StringRef Stem = sys::path::stem(ToolName); + if (Stem.find("dlltool") != StringRef::npos) + return dlltoolDriverMain(makeArrayRef(argv, argc)); + if (Stem.find("ranlib") == StringRef::npos && Stem.find("lib") != StringRef::npos) return libDriverMain(makeArrayRef(argv, argc)); @@ -878,5 +882,5 @@ int main(int argc, char **argv) { return ranlib_main(); if (Stem.find("ar") != StringRef::npos) return ar_main(); - fail("Not ranlib, ar or lib!"); + fail("Not ranlib, ar, lib or dlltool!"); } diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp index 812f1af3ac68..d54b45515f05 100644 --- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -870,7 +870,10 @@ static void printRelocationTargetName(const MachOObjectFile *O, bool isExtern = O->getPlainRelocationExternal(RE); uint64_t Val = O->getPlainRelocationSymbolNum(RE); - if (isExtern) { + if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) { + fmt << format("0x%x", Val); + return; + } else if (isExtern) { symbol_iterator SI = O->symbol_begin(); advance(SI, Val); Expected SOrErr = SI->getName(); diff --git a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp index 0642d841fd9f..01c7481c3086 100644 --- a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -654,7 +654,7 @@ static void dumpFullTypeStream(LinePrinter &Printer, NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords()); MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types, - Stream.getHashValues()); + Stream.getNumHashBuckets(), Stream.getHashValues()); if (auto EC = codeview::visitTypeStream(Types, V)) { Printer.formatLine("An error occurred dumping type records: {0}", @@ -670,7 +670,7 @@ static void dumpPartialTypeStream(LinePrinter &Printer, NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords()); MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types, - Stream.getHashValues()); + Stream.getNumHashBuckets(), Stream.getHashValues()); if (opts::dump::DumpTypeDependents) { // If we need to dump all dependents, then iterate each index and find diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp index ab7045ca4492..d93843649db0 100644 --- a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp +++ b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp @@ -725,8 +725,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { Proc.Parent, Proc.End, formatSegmentOffset(Proc.Segment, Proc.CodeOffset), Proc.CodeSize); - P.formatLine("debug start = {0}, debug end = {1}, flags = {2}", Proc.DbgStart, - Proc.DbgEnd, + // FIXME: It seems FunctionType is sometimes an id and sometimes a type. + P.formatLine("type = `{0}`, debug start = {1}, debug end = {2}, flags = {3}", + typeIndex(Proc.FunctionType), Proc.DbgStart, Proc.DbgEnd, formatProcSymFlags(P.getIndentLevel() + 9, Proc.Flags)); return Error::success(); } diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp index 9621320ea99a..0079b9e7eaa4 100644 --- a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp +++ b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp @@ -18,6 +18,7 @@ #include "llvm/DebugInfo/CodeView/Formatters.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" @@ -214,10 +215,20 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { getLeafTypeName(Record.Type), Record.length()); } else { std::string H; - if (Index.toArrayIndex() >= HashValues.size()) + if (Index.toArrayIndex() >= HashValues.size()) { H = "(not present)"; - else - H = utostr(HashValues[Index.toArrayIndex()]); + } else { + uint32_t Hash = HashValues[Index.toArrayIndex()]; + Expected MaybeHash = hashTypeRecord(Record); + if (!MaybeHash) + return MaybeHash.takeError(); + uint32_t OurHash = *MaybeHash; + OurHash %= NumHashBuckets; + if (Hash == OurHash) + H = "0x" + utohexstr(Hash); + else + H = "0x" + utohexstr(Hash) + ", our hash = 0x" + utohexstr(OurHash); + } P.formatLine("{0} | {1} [size = {2}, hash = {3}]", fmt_align(Index, AlignStyle::Right, Width), getLeafTypeName(Record.Type), Record.length(), H); @@ -395,8 +406,7 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR, Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) { - P.formatLine("name = {0}, age = {1}, guid = {2}", TS.Name, TS.Age, - fmt_guid(TS.Guid)); + P.formatLine("name = {0}, age = {1}, guid = {2}", TS.Name, TS.Age, TS.Guid); return Error::success(); } diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h index 42882b4b4060..4227688f0f71 100644 --- a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h +++ b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h @@ -25,9 +25,10 @@ class MinimalTypeDumpVisitor : public codeview::TypeVisitorCallbacks { public: MinimalTypeDumpVisitor(LinePrinter &P, uint32_t Width, bool RecordBytes, bool Hashes, codeview::LazyRandomTypeCollection &Types, + uint32_t NumHashBuckets, FixedStreamArray HashValues) : P(P), Width(Width), RecordBytes(RecordBytes), Hashes(Hashes), - Types(Types), HashValues(HashValues) {} + Types(Types), NumHashBuckets(NumHashBuckets), HashValues(HashValues) {} Error visitTypeBegin(codeview::CVType &Record, codeview::TypeIndex Index) override; @@ -53,6 +54,7 @@ class MinimalTypeDumpVisitor : public codeview::TypeVisitorCallbacks { bool RecordBytes = false; bool Hashes = false; codeview::LazyRandomTypeCollection &Types; + uint32_t NumHashBuckets; FixedStreamArray HashValues; }; } // namespace pdb diff --git a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp index 315ae2e6711f..9c3beb566d2c 100644 --- a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp +++ b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp @@ -38,41 +38,6 @@ LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::pdb::PdbRaw_FeatureSig) namespace llvm { namespace yaml { -template <> struct ScalarTraits { - static void output(const llvm::pdb::PDB_UniqueId &S, void *, - llvm::raw_ostream &OS) { - OS << S; - } - - static StringRef input(StringRef Scalar, void *Ctx, - llvm::pdb::PDB_UniqueId &S) { - if (Scalar.size() != 38) - return "GUID strings are 38 characters long"; - if (Scalar[0] != '{' || Scalar[37] != '}') - return "GUID is not enclosed in {}"; - if (Scalar[9] != '-' || Scalar[14] != '-' || Scalar[19] != '-' || - Scalar[24] != '-') - return "GUID sections are not properly delineated with dashes"; - - uint8_t *OutBuffer = S.Guid; - for (auto Iter = Scalar.begin(); Iter != Scalar.end();) { - if (*Iter == '-' || *Iter == '{' || *Iter == '}') { - ++Iter; - continue; - } - uint8_t Value = (llvm::hexDigitValue(*Iter) << 4); - ++Iter; - Value |= llvm::hexDigitValue(*Iter); - ++Iter; - *OutBuffer++ = Value; - } - - return ""; - } - - static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); } -}; - template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, llvm::pdb::PDB_Machine &Value) { io.enumCase(Value, "Invalid", PDB_Machine::Invalid); diff --git a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h index 62ed608916fc..91e054490a5f 100644 --- a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h +++ b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h @@ -57,7 +57,7 @@ struct PdbInfoStream { PdbRaw_ImplVer Version = PdbImplVC70; uint32_t Signature = 0; uint32_t Age = 1; - PDB_UniqueId Guid; + codeview::GUID Guid; std::vector Features; std::vector NamedStreams; }; diff --git a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index 6aa08ff3cd87..f2bd194622ed 100644 --- a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -956,8 +956,8 @@ static void mergePdbs() { SmallVector IdMap; if (File.hasPDBTpiStream()) { auto &Tpi = ExitOnErr(File.getPDBTpiStream()); - ExitOnErr(codeview::mergeTypeRecords(MergedTpi, TypeMap, nullptr, - Tpi.typeArray())); + ExitOnErr( + codeview::mergeTypeRecords(MergedTpi, TypeMap, Tpi.typeArray())); } if (File.hasPDBIpiStream()) { auto &Ipi = ExitOnErr(File.getPDBIpiStream()); diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp index 9fb3267e2f9d..74c44116b127 100644 --- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -1215,8 +1215,7 @@ void COFFDumper::mergeCodeViewTypes(TypeTableBuilder &CVIDs, error(object_error::parse_failed); } SmallVector SourceToDest; - if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, nullptr, - Types)) + if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types)) return error(std::move(EC)); } } diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp index a1db96cba081..5698420bbcc2 100644 --- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1532,6 +1532,7 @@ static const char *getTypeString(unsigned Arch, uint64_t Type) { LLVM_READOBJ_TYPE_CASE(TLSDESC_PLT); LLVM_READOBJ_TYPE_CASE(TLSDESC_GOT); LLVM_READOBJ_TYPE_CASE(AUXILIARY); + LLVM_READOBJ_TYPE_CASE(FILTER); default: return "unknown"; } } @@ -1624,6 +1625,10 @@ StringRef ELFDumper::getDynamicString(uint64_t Value) const { return StringRef(DynamicStringTable.data() + Value); } +static void printLibrary(raw_ostream &OS, const Twine &Tag, const Twine &Name) { + OS << Tag << ": [" << Name << "]"; +} + template void ELFDumper::printValue(uint64_t Type, uint64_t Value) { raw_ostream &OS = W.getOStream(); @@ -1687,13 +1692,16 @@ void ELFDumper::printValue(uint64_t Type, uint64_t Value) { OS << Value << " (bytes)"; break; case DT_NEEDED: - OS << "SharedLibrary (" << getDynamicString(Value) << ")"; + printLibrary(OS, "Shared library", getDynamicString(Value)); break; case DT_SONAME: - OS << "LibrarySoname (" << getDynamicString(Value) << ")"; + printLibrary(OS, "Library soname", getDynamicString(Value)); break; case DT_AUXILIARY: - OS << "Auxiliary library: [" << getDynamicString(Value) << "]"; + printLibrary(OS, "Auxiliary library", getDynamicString(Value)); + break; + case DT_FILTER: + printLibrary(OS, "Filter library", getDynamicString(Value)); break; case DT_RPATH: case DT_RUNPATH: diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp index 51991a3f067b..7bfb18fab12b 100644 --- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/Signals.h" @@ -50,6 +51,13 @@ namespace opts { cl::desc(""), cl::ZeroOrMore); + // -wide, -W + cl::opt WideOutput("wide", + cl::desc("Ignored for compatibility with GNU readelf")); + cl::alias WideOutputShort("W", + cl::desc("Alias for --wide"), + cl::aliasopt(WideOutput)); + // -file-headers, -h cl::opt FileHeaders("file-headers", cl::desc("Display file headers ")); @@ -57,12 +65,16 @@ namespace opts { cl::desc("Alias for --file-headers"), cl::aliasopt(FileHeaders)); - // -sections, -s + // -sections, -s, -S + // Note: In GNU readelf, -s means --symbols! cl::opt Sections("sections", cl::desc("Display all sections.")); cl::alias SectionsShort("s", cl::desc("Alias for --sections"), cl::aliasopt(Sections)); + cl::alias SectionsShortUpper("S", + cl::desc("Alias for --sections"), + cl::aliasopt(Sections)); // -section-relocations, -sr cl::opt SectionRelocations("section-relocations", @@ -533,13 +545,19 @@ static void dumpInput(StringRef File) { } int main(int argc, const char *argv[]) { - sys::PrintStackTraceOnErrorSignal(argv[0]); + StringRef ToolName = argv[0]; + sys::PrintStackTraceOnErrorSignal(ToolName); PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Register the target printer for --version. cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); + opts::WideOutput.setHiddenFlag(cl::Hidden); + + if (sys::path::stem(ToolName).find("readelf") != StringRef::npos) + opts::Output = opts::GNU; + cl::ParseCommandLineOptions(argc, argv, "LLVM Object Reader\n"); // Default to stdin if no filename is specified. diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp index d4a21a986c58..6399fb5ec1dd 100644 --- a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -1268,12 +1268,12 @@ void CodeGenRegBank::computeSubRegLaneMasks() { CoveringLanes = LaneBitmask::getAll(); for (auto &Idx : SubRegIndices) { if (Idx.getComposites().empty()) { - if (Bit > 32) { + if (Bit > LaneBitmask::BitWidth) { PrintFatalError( Twine("Ran out of lanemask bits to represent subregister ") + Idx.getName()); } - Idx.LaneMask = LaneBitmask(1 << Bit); + Idx.LaneMask = LaneBitmask::getLane(Bit); ++Bit; } else { Idx.LaneMask = LaneBitmask::getNone(); @@ -1298,9 +1298,9 @@ void CodeGenRegBank::computeSubRegLaneMasks() { static_assert(sizeof(Idx.LaneMask.getAsInteger()) == 4, "Change Log2_32 to a proper one"); unsigned DstBit = Log2_32(Idx.LaneMask.getAsInteger()); - assert(Idx.LaneMask == LaneBitmask(1 << DstBit) && + assert(Idx.LaneMask == LaneBitmask::getLane(DstBit) && "Must be a leaf subregister"); - MaskRolPair MaskRol = { LaneBitmask(1), (uint8_t)DstBit }; + MaskRolPair MaskRol = { LaneBitmask::getLane(0), (uint8_t)DstBit }; LaneTransforms.push_back(MaskRol); } else { // Go through all leaf subregisters and find the ones that compose with @@ -1314,7 +1314,7 @@ void CodeGenRegBank::computeSubRegLaneMasks() { continue; // Replicate the behaviour from the lane mask generation loop above. unsigned SrcBit = NextBit; - LaneBitmask SrcMask = LaneBitmask(1 << SrcBit); + LaneBitmask SrcMask = LaneBitmask::getLane(SrcBit); if (NextBit < LaneBitmask::BitWidth-1) ++NextBit; assert(Idx2.LaneMask == SrcMask); @@ -1386,7 +1386,7 @@ void CodeGenRegBank::computeSubRegLaneMasks() { // For classes without any subregisters set LaneMask to 1 instead of 0. // This makes it easier for client code to handle classes uniformly. if (LaneMask.none()) - LaneMask = LaneBitmask(1); + LaneMask = LaneBitmask::getLane(0); RegClass.LaneMask = LaneMask; } diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc index 67eef20f8640..67b65b4ff095 100644 --- a/lib/clang/include/clang/Basic/Version.inc +++ b/lib/clang/include/clang/Basic/Version.inc @@ -8,4 +8,4 @@ #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "307894" +#define SVN_REVISION "308421" diff --git a/lib/clang/include/lld/Config/Version.inc b/lib/clang/include/lld/Config/Version.inc index b8450b7c0456..ab0b0d55a5c9 100644 --- a/lib/clang/include/lld/Config/Version.inc +++ b/lib/clang/include/lld/Config/Version.inc @@ -4,5 +4,5 @@ #define LLD_VERSION_STRING "5.0.0" #define LLD_VERSION_MAJOR 5 #define LLD_VERSION_MINOR 0 -#define LLD_REVISION_STRING "307894" +#define LLD_REVISION_STRING "308421" #define LLD_REPOSITORY_STRING "FreeBSD" diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index b40ecf0c5c10..d69caa193f34 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,2 +1,2 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "svn-r307894" +#define LLVM_REVISION "svn-r308421" diff --git a/lib/clang/libllvm/Makefile b/lib/clang/libllvm/Makefile index c3a0e52737b8..e02af194ac60 100644 --- a/lib/clang/libllvm/Makefile +++ b/lib/clang/libllvm/Makefile @@ -617,6 +617,8 @@ SRCS_MIN+= MC/WinCOFFObjectWriter.cpp SRCS_MIN+= Object/Archive.cpp SRCS_MIN+= Object/ArchiveWriter.cpp SRCS_MIN+= Object/Binary.cpp +SRCS_EXT+= Object/COFFImportFile.cpp +SRCS_EXT+= Object/COFFModuleDefinition.cpp SRCS_MIN+= Object/COFFObjectFile.cpp SRCS_MIN+= Object/Decompressor.cpp SRCS_MIN+= Object/ELF.cpp @@ -779,6 +781,7 @@ SRCS_MIN+= Target/AArch64/AArch64ConditionOptimizer.cpp SRCS_MIN+= Target/AArch64/AArch64ConditionalCompares.cpp SRCS_MIN+= Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp SRCS_MIN+= Target/AArch64/AArch64ExpandPseudoInsts.cpp +SRCS_MIN+= Target/AArch64/AArch64FalkorHWPFFix.cpp SRCS_MIN+= Target/AArch64/AArch64FastISel.cpp SRCS_MIN+= Target/AArch64/AArch64FrameLowering.cpp SRCS_MIN+= Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1011,6 +1014,7 @@ SRCS_MIN+= Target/X86/X86AsmPrinter.cpp SRCS_MIN+= Target/X86/X86CallFrameOptimization.cpp SRCS_MIN+= Target/X86/X86CallLowering.cpp SRCS_MIN+= Target/X86/X86CallingConv.cpp +SRCS_MIN+= Target/X86/X86CmovConversion.cpp SRCS_MIN+= Target/X86/X86EvexToVex.cpp SRCS_MIN+= Target/X86/X86ExpandPseudo.cpp SRCS_MIN+= Target/X86/X86FastISel.cpp @@ -1042,6 +1046,7 @@ SRCS_MIN+= Target/X86/X86TargetTransformInfo.cpp SRCS_MIN+= Target/X86/X86VZeroUpper.cpp SRCS_MIN+= Target/X86/X86WinAllocaExpander.cpp SRCS_MIN+= Target/X86/X86WinEHState.cpp +SRCS_EXT+= ToolDrivers/llvm-dlltool/DlltoolDriver.cpp SRCS_EXL+= ToolDrivers/llvm-lib/LibDriver.cpp SRCS_MIN+= Transforms/Coroutines/CoroCleanup.cpp SRCS_MIN+= Transforms/Coroutines/CoroEarly.cpp @@ -1283,11 +1288,19 @@ AttributesCompatFunc.inc: ${LLVM_SRCS}/lib/IR/AttributesCompatFunc.td ${LLVM_SRCS}/lib/IR/AttributesCompatFunc.td TGHDRS+= AttributesCompatFunc.inc -Options.inc: ${LLVM_SRCS}/lib/ToolDrivers/llvm-lib/Options.td +llvm-lib/Options.inc: ${LLVM_SRCS}/lib/ToolDrivers/llvm-lib/Options.td ${LLVM_TBLGEN} -gen-opt-parser-defs \ -I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \ ${LLVM_SRCS}/lib/ToolDrivers/llvm-lib/Options.td -TGHDRS+= Options.inc +TGHDRS+= llvm-lib/Options.inc +CFLAGS.LibDriver.cpp+= -I${.OBJDIR}/llvm-lib + +llvm-dlltool/Options.inc: ${LLVM_SRCS}/lib/ToolDrivers/llvm-dlltool/Options.td + ${LLVM_TBLGEN} -gen-opt-parser-defs \ + -I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \ + ${LLVM_SRCS}/lib/ToolDrivers/llvm-dlltool/Options.td +TGHDRS+= llvm-dlltool/Options.inc +CFLAGS.DlltoolDriver.cpp+= -I${.OBJDIR}/llvm-dlltool # Note: some rules are superfluous, not every combination is valid. .for arch in \