diff --git a/contrib/compiler-rt/include/sanitizer/tsan_interface.h b/contrib/compiler-rt/include/sanitizer/tsan_interface.h index 34b74d537e0c..45e54f7581a1 100644 --- a/contrib/compiler-rt/include/sanitizer/tsan_interface.h +++ b/contrib/compiler-rt/include/sanitizer/tsan_interface.h @@ -114,6 +114,21 @@ void __tsan_mutex_post_signal(void *addr, unsigned flags); void __tsan_mutex_pre_divert(void *addr, unsigned flags); void __tsan_mutex_post_divert(void *addr, unsigned flags); +// External race detection API. +// Can be used by non-instrumented libraries to detect when their objects are +// being used in an unsafe manner. +// - __tsan_external_read/__tsan_external_write annotates the logical reads +// and writes of the object at the specified address. 'caller_pc' should +// be the PC of the library user, which the library can obtain with e.g. +// `__builtin_return_address(0)`. +// - __tsan_external_register_tag registers a 'tag' with the specified name, +// which is later used in read/write annotations to denote the object type +// - __tsan_external_assign_tag can optionally mark a heap object with a tag +void *__tsan_external_register_tag(const char *object_type); +void __tsan_external_assign_tag(void *addr, void *tag); +void __tsan_external_read(void *addr, void *caller_pc, void *tag); +void __tsan_external_write(void *addr, void *caller_pc, void *tag); + #ifdef __cplusplus } // extern "C" #endif diff --git a/contrib/compiler-rt/lib/builtins/emutls.c b/contrib/compiler-rt/lib/builtins/emutls.c index eccbf53366e3..e8d5ddb22011 100644 --- a/contrib/compiler-rt/lib/builtins/emutls.c +++ b/contrib/compiler-rt/lib/builtins/emutls.c @@ -7,7 +7,6 @@ * * ===----------------------------------------------------------------------=== */ -#include #include #include #include @@ -15,6 +14,23 @@ #include "int_lib.h" #include "int_util.h" +typedef struct emutls_address_array { + uintptr_t size; /* number of elements in the 'data' array */ + void* data[]; +} emutls_address_array; + +static void emutls_shutdown(emutls_address_array *array); + +#ifndef _WIN32 + +#include + +static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_key_t emutls_pthread_key; + +typedef unsigned int gcc_word __attribute__((mode(word))); +typedef unsigned int gcc_pointer __attribute__((mode(pointer))); + /* Default is not to use posix_memalign, so systems like Android * can use thread local data without heavier POSIX memory allocators. */ @@ -22,26 +38,6 @@ #define EMUTLS_USE_POSIX_MEMALIGN 0 #endif -/* For every TLS variable xyz, - * there is one __emutls_control variable named __emutls_v.xyz. - * If xyz has non-zero initial value, __emutls_v.xyz's "value" - * will point to __emutls_t.xyz, which has the initial value. - */ -typedef unsigned int gcc_word __attribute__((mode(word))); -typedef struct __emutls_control { - /* Must use gcc_word here, instead of size_t, to match GCC. When - gcc_word is larger than size_t, the upper extra bits are all - zeros. We can use variables of size_t to operate on size and - align. */ - gcc_word size; /* size of the object in bytes */ - gcc_word align; /* alignment of the object in bytes */ - union { - uintptr_t index; /* data[index-1] is the object address */ - void* address; /* object address, when in single thread env */ - } object; - void* value; /* null or non-zero initial value for the object */ -} __emutls_control; - static __inline void *emutls_memalign_alloc(size_t align, size_t size) { void *base; #if EMUTLS_USE_POSIX_MEMALIGN @@ -50,7 +46,7 @@ static __inline void *emutls_memalign_alloc(size_t align, size_t size) { #else #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*)) char* object; - if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) + if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) abort(); base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) & ~(uintptr_t)(align - 1)); @@ -69,10 +65,207 @@ static __inline void emutls_memalign_free(void *base) { #endif } +static void emutls_key_destructor(void* ptr) { + emutls_shutdown((emutls_address_array*)ptr); + free(ptr); +} + +static __inline void emutls_init(void) { + if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) + abort(); +} + +static __inline void emutls_init_once(void) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once(&once, emutls_init); +} + +static __inline void emutls_lock() { + pthread_mutex_lock(&emutls_mutex); +} + +static __inline void emutls_unlock() { + pthread_mutex_unlock(&emutls_mutex); +} + +static __inline void emutls_setspecific(emutls_address_array *value) { + pthread_setspecific(emutls_pthread_key, (void*) value); +} + +static __inline emutls_address_array* emutls_getspecific() { + return (emutls_address_array*) pthread_getspecific(emutls_pthread_key); +} + +#else + +#include +#include +#include +#include +#include + +static LPCRITICAL_SECTION emutls_mutex; +static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES; + +typedef uintptr_t gcc_word; +typedef void * gcc_pointer; + +static void win_error(DWORD last_err, const char *hint) { + char *buffer = NULL; + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { + fprintf(stderr, "Windows error: %s\n", buffer); + } else { + fprintf(stderr, "Unkown Windows error: %s\n", hint); + } + LocalFree(buffer); +} + +static __inline void win_abort(DWORD last_err, const char *hint) { + win_error(last_err, hint); + abort(); +} + +static __inline void *emutls_memalign_alloc(size_t align, size_t size) { + void *base = _aligned_malloc(size, align); + if (!base) + win_abort(GetLastError(), "_aligned_malloc"); + return base; +} + +static __inline void emutls_memalign_free(void *base) { + _aligned_free(base); +} + +static void emutls_exit(void) { + if (emutls_mutex) { + DeleteCriticalSection(emutls_mutex); + _aligned_free(emutls_mutex); + emutls_mutex = NULL; + } + if (emutls_tls_index != TLS_OUT_OF_INDEXES) { + emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index)); + TlsFree(emutls_tls_index); + emutls_tls_index = TLS_OUT_OF_INDEXES; + } +} + +#pragma warning (push) +#pragma warning (disable : 4100) +static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) { + emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); + if (!emutls_mutex) { + win_error(GetLastError(), "_aligned_malloc"); + return FALSE; + } + InitializeCriticalSection(emutls_mutex); + + emutls_tls_index = TlsAlloc(); + if (emutls_tls_index == TLS_OUT_OF_INDEXES) { + emutls_exit(); + win_error(GetLastError(), "TlsAlloc"); + return FALSE; + } + atexit(&emutls_exit); + return TRUE; +} + +static __inline void emutls_init_once(void) { + static INIT_ONCE once; + InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); +} + +static __inline void emutls_lock() { + EnterCriticalSection(emutls_mutex); +} + +static __inline void emutls_unlock() { + LeaveCriticalSection(emutls_mutex); +} + +static __inline void emutls_setspecific(emutls_address_array *value) { + if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0) + win_abort(GetLastError(), "TlsSetValue"); +} + +static __inline emutls_address_array* emutls_getspecific() { + LPVOID value = TlsGetValue(emutls_tls_index); + if (value == NULL) { + const DWORD err = GetLastError(); + if (err != ERROR_SUCCESS) + win_abort(err, "TlsGetValue"); + } + return (emutls_address_array*) value; +} + +/* Provide atomic load/store functions for emutls_get_index if built with MSVC. + */ +#if !defined(__ATOMIC_RELEASE) + +enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 }; + +static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) { + assert(type == __ATOMIC_ACQUIRE); +#ifdef _WIN64 + return (uintptr_t) _load_be_u64(ptr); +#else + return (uintptr_t) _load_be_u32(ptr); +#endif +} + +static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { + assert(type == __ATOMIC_RELEASE); +#ifdef _WIN64 + _store_be_u64(ptr, val); +#else + _store_be_u32(ptr, val); +#endif +} + +#endif + +#pragma warning (pop) + +#endif + +static size_t emutls_num_object = 0; /* number of allocated TLS objects */ + +/* Free the allocated TLS data + */ +static void emutls_shutdown(emutls_address_array *array) { + if (array) { + uintptr_t i; + for (i = 0; i < array->size; ++i) { + if (array->data[i]) + emutls_memalign_free(array->data[i]); + } + } +} + +/* For every TLS variable xyz, + * there is one __emutls_control variable named __emutls_v.xyz. + * If xyz has non-zero initial value, __emutls_v.xyz's "value" + * will point to __emutls_t.xyz, which has the initial value. + */ +typedef struct __emutls_control { + /* Must use gcc_word here, instead of size_t, to match GCC. When + gcc_word is larger than size_t, the upper extra bits are all + zeros. We can use variables of size_t to operate on size and + align. */ + gcc_word size; /* size of the object in bytes */ + gcc_word align; /* alignment of the object in bytes */ + union { + uintptr_t index; /* data[index-1] is the object address */ + void* address; /* object address, when in single thread env */ + } object; + void* value; /* null or non-zero initial value for the object */ +} __emutls_control; + /* Emulated TLS objects are always allocated at run-time. */ static __inline void *emutls_allocate_object(__emutls_control *control) { /* Use standard C types, check with gcc's emutls.o. */ - typedef unsigned int gcc_pointer __attribute__((mode(pointer))); COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*)); @@ -93,45 +286,19 @@ static __inline void *emutls_allocate_object(__emutls_control *control) { return base; } -static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; - -static size_t emutls_num_object = 0; /* number of allocated TLS objects */ - -typedef struct emutls_address_array { - uintptr_t size; /* number of elements in the 'data' array */ - void* data[]; -} emutls_address_array; - -static pthread_key_t emutls_pthread_key; - -static void emutls_key_destructor(void* ptr) { - emutls_address_array* array = (emutls_address_array*)ptr; - uintptr_t i; - for (i = 0; i < array->size; ++i) { - if (array->data[i]) - emutls_memalign_free(array->data[i]); - } - free(ptr); -} - -static void emutls_init(void) { - if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) - abort(); -} /* Returns control->object.index; set index if not allocated yet. */ static __inline uintptr_t emutls_get_index(__emutls_control *control) { uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); if (!index) { - static pthread_once_t once = PTHREAD_ONCE_INIT; - pthread_once(&once, emutls_init); - pthread_mutex_lock(&emutls_mutex); + emutls_init_once(); + emutls_lock(); index = control->object.index; if (!index) { index = ++emutls_num_object; __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); } - pthread_mutex_unlock(&emutls_mutex); + emutls_unlock(); } return index; } @@ -142,7 +309,7 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array, if (array == NULL) abort(); array->size = size; - pthread_setspecific(emutls_pthread_key, (void*)array); + emutls_setspecific(array); } /* Returns the new 'data' array size, number of elements, @@ -156,22 +323,29 @@ static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { return ((index + 1 + 15) & ~((uintptr_t)15)) - 1; } +/* Returns the size in bytes required for an emutls_address_array with + * N number of elements for data field. + */ +static __inline uintptr_t emutls_asize(uintptr_t N) { + return N * sizeof(void *) + sizeof(emutls_address_array); +} + /* Returns the thread local emutls_address_array. * Extends its size if necessary to hold address at index. */ static __inline emutls_address_array * emutls_get_address_array(uintptr_t index) { - emutls_address_array* array = pthread_getspecific(emutls_pthread_key); + emutls_address_array* array = emutls_getspecific(); if (array == NULL) { uintptr_t new_size = emutls_new_data_array_size(index); - array = malloc(new_size * sizeof(void *) + sizeof(emutls_address_array)); + array = (emutls_address_array*) malloc(emutls_asize(new_size)); if (array) memset(array->data, 0, new_size * sizeof(void*)); emutls_check_array_set_size(array, new_size); } else if (index > array->size) { uintptr_t orig_size = array->size; uintptr_t new_size = emutls_new_data_array_size(index); - array = realloc(array, new_size * sizeof(void *) + sizeof(emutls_address_array)); + array = (emutls_address_array*) realloc(array, emutls_asize(new_size)); if (array) memset(array->data + orig_size, 0, (new_size - orig_size) * sizeof(void*)); @@ -182,8 +356,8 @@ emutls_get_address_array(uintptr_t index) { void* __emutls_get_address(__emutls_control* control) { uintptr_t index = emutls_get_index(control); - emutls_address_array* array = emutls_get_address_array(index); - if (array->data[index - 1] == NULL) - array->data[index - 1] = emutls_allocate_object(control); - return array->data[index - 1]; + emutls_address_array* array = emutls_get_address_array(index--); + if (array->data[index] == NULL) + array->data[index] = emutls_allocate_object(control); + return array->data[index]; } diff --git a/contrib/compiler-rt/lib/lsan/lsan_allocator.h b/contrib/compiler-rt/lib/lsan/lsan_allocator.h index e5def17d4ee9..fad5adb01a7f 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_allocator.h +++ b/contrib/compiler-rt/lib/lsan/lsan_allocator.h @@ -59,7 +59,7 @@ typedef CompactSizeClassMap SizeClassMap; typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, sizeof(ChunkMetadata), SizeClassMap, kRegionSizeLog, ByteMap> PrimaryAllocator; -#elif defined(__x86_64__) +#elif defined(__x86_64__) || defined(__powerpc64__) struct AP64 { // Allocator64 parameters. Deliberately using a short name. static const uptr kSpaceBeg = 0x600000000000ULL; static const uptr kSpaceSize = 0x40000000000ULL; // 4T. diff --git a/contrib/compiler-rt/lib/lsan/lsan_common.cc b/contrib/compiler-rt/lib/lsan/lsan_common.cc index 200f16a594fa..a6b3453f5a0b 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_common.cc @@ -70,12 +70,13 @@ static const char kSuppressionLeak[] = "leak"; static const char *kSuppressionTypes[] = { kSuppressionLeak }; static const char kStdSuppressions[] = #if SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT - // The actual string allocation happens here (for more details refer to the - // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT definition). - "leak:*_dl_map_object_deps*"; -#else - ""; + // For more details refer to the SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT + // definition. + "leak:*pthread_exit*\n" #endif // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT + // TLS leak in some glibc versions, described in + // https://sourceware.org/bugzilla/show_bug.cgi?id=12650. + "leak:*tls_get_addr*\n"; void InitializeSuppressions() { CHECK_EQ(nullptr, suppression_ctx); diff --git a/contrib/compiler-rt/lib/lsan/lsan_common.h b/contrib/compiler-rt/lib/lsan/lsan_common.h index 121b9c082983..beb31d6f40e4 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common.h +++ b/contrib/compiler-rt/lib/lsan/lsan_common.h @@ -32,7 +32,8 @@ // new architecture inside sanitizer library. #if (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) && \ (SANITIZER_WORDSIZE == 64) && \ - (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__)) + (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \ + defined(__powerpc64__)) #define CAN_SANITIZE_LEAKS 1 #elif defined(__i386__) && \ (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h index d6c66604ec86..b3729bf55dbb 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h @@ -180,6 +180,7 @@ struct SizeClassAllocator32LocalCache { uptr count; uptr max_count; uptr class_size; + uptr class_id_for_transfer_batch; void *batch[2 * TransferBatch::kMaxNumCached]; }; PerClass per_class_[kNumClasses]; @@ -188,32 +189,31 @@ struct SizeClassAllocator32LocalCache { void InitCache() { if (per_class_[1].max_count) return; + // TransferBatch class is declared in SizeClassAllocator. + uptr class_id_for_transfer_batch = + SizeClassMap::ClassID(sizeof(TransferBatch)); for (uptr i = 0; i < kNumClasses; i++) { PerClass *c = &per_class_[i]; - c->max_count = 2 * TransferBatch::MaxCached(i); + uptr max_cached = TransferBatch::MaxCached(i); + c->max_count = 2 * max_cached; c->class_size = Allocator::ClassIdToSize(i); + // We transfer chunks between central and thread-local free lists in + // batches. For small size classes we allocate batches separately. For + // large size classes we may use one of the chunks to store the batch. + // sizeof(TransferBatch) must be a power of 2 for more efficient + // allocation. + c->class_id_for_transfer_batch = (c->class_size < + TransferBatch::AllocationSizeRequiredForNElements(max_cached)) ? + class_id_for_transfer_batch : 0; } } - // TransferBatch class is declared in SizeClassAllocator. - // We transfer chunks between central and thread-local free lists in batches. - // For small size classes we allocate batches separately. - // For large size classes we may use one of the chunks to store the batch. - // sizeof(TransferBatch) must be a power of 2 for more efficient allocation. - static uptr SizeClassForTransferBatch(uptr class_id) { - if (Allocator::ClassIdToSize(class_id) < - TransferBatch::AllocationSizeRequiredForNElements( - TransferBatch::MaxCached(class_id))) - return SizeClassMap::ClassID(sizeof(TransferBatch)); - return 0; - } - // Returns a TransferBatch suitable for class_id. // For small size classes allocates the batch from the allocator. // For large size classes simply returns b. TransferBatch *CreateBatch(uptr class_id, SizeClassAllocator *allocator, TransferBatch *b) { - if (uptr batch_class_id = SizeClassForTransferBatch(class_id)) + if (uptr batch_class_id = per_class_[class_id].class_id_for_transfer_batch) return (TransferBatch*)Allocate(allocator, batch_class_id); return b; } @@ -223,7 +223,7 @@ struct SizeClassAllocator32LocalCache { // Does notthing for large size classes. void DestroyBatch(uptr class_id, SizeClassAllocator *allocator, TransferBatch *b) { - if (uptr batch_class_id = SizeClassForTransferBatch(class_id)) + if (uptr batch_class_id = per_class_[class_id].class_id_for_transfer_batch) Deallocate(allocator, batch_class_id, b); } diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index d1c793c551f7..4fe1ac8f9da7 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -304,7 +304,7 @@ INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T maxlen) { INTERCEPTOR(char*, textdomain, const char *domainname) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, textdomain, domainname); - COMMON_INTERCEPTOR_READ_STRING(ctx, domainname, 0); + if (domainname) COMMON_INTERCEPTOR_READ_STRING(ctx, domainname, 0); char *domain = REAL(textdomain)(domainname); if (domain) { COMMON_INTERCEPTOR_INITIALIZE_RANGE(domain, REAL(strlen)(domain) + 1); @@ -3330,7 +3330,7 @@ INTERCEPTOR(char *, strerror, int errnum) { // * GNU version returns message pointer, which points to either buf or some // static storage. #if ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) || \ - SANITIZER_MAC + SANITIZER_MAC || SANITIZER_ANDROID // POSIX version. Spec is not clear on whether buf is NULL-terminated. // At least on OSX, buf contents are valid even when the call fails. INTERCEPTOR(int, strerror_r, int errnum, char *buf, SIZE_T buflen) { diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 5cbd78d3c1bf..3ffa76e3430f 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -87,7 +87,7 @@ namespace __sanitizer { #elif defined(__mips__) const unsigned struct_kernel_stat_sz = SANITIZER_ANDROID ? FIRST_32_SECOND_64(104, 128) : - FIRST_32_SECOND_64(144, 216); + FIRST_32_SECOND_64(160, 216); const unsigned struct_kernel_stat64_sz = 104; #elif defined(__s390__) && !defined(__s390x__) const unsigned struct_kernel_stat_sz = 64; diff --git a/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp b/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp index 9812fc0f59f8..e89e09223ff8 100644 --- a/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp +++ b/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp @@ -460,6 +460,38 @@ struct ScudoAllocator { return UserPtr; } + // Place a chunk in the quarantine. In the event of a zero-sized quarantine, + // we directly deallocate the chunk, otherwise the flow would lead to the + // chunk being checksummed twice, once before Put and once in Recycle, with + // no additional security value. + void quarantineOrDeallocateChunk(ScudoChunk *Chunk, UnpackedHeader *Header, + uptr Size) { + bool BypassQuarantine = (AllocatorQuarantine.GetCacheSize() == 0); + if (BypassQuarantine) { + Chunk->eraseHeader(); + void *Ptr = Chunk->getAllocBeg(Header); + if (LIKELY(!ThreadTornDown)) { + getBackendAllocator().Deallocate(&Cache, Ptr); + } else { + SpinMutexLock Lock(&FallbackMutex); + getBackendAllocator().Deallocate(&FallbackAllocatorCache, Ptr); + } + } else { + UnpackedHeader NewHeader = *Header; + NewHeader.State = ChunkQuarantine; + Chunk->compareExchangeHeader(&NewHeader, Header); + if (LIKELY(!ThreadTornDown)) { + AllocatorQuarantine.Put(&ThreadQuarantineCache, + QuarantineCallback(&Cache), Chunk, Size); + } else { + SpinMutexLock l(&FallbackMutex); + AllocatorQuarantine.Put(&FallbackQuarantineCache, + QuarantineCallback(&FallbackAllocatorCache), + Chunk, Size); + } + } + } + // Deallocates a Chunk, which means adding it to the delayed free list (or // Quarantine). void deallocate(void *UserPtr, uptr DeleteSize, AllocType Type) { @@ -499,24 +531,12 @@ struct ScudoAllocator { } } - UnpackedHeader NewHeader = OldHeader; - NewHeader.State = ChunkQuarantine; - Chunk->compareExchangeHeader(&NewHeader, &OldHeader); - // If a small memory amount was allocated with a larger alignment, we want // to take that into account. Otherwise the Quarantine would be filled with - // tiny chunks, taking a lot of VA memory. This an approximation of the + // tiny chunks, taking a lot of VA memory. This is an approximation of the // usable size, that allows us to not call GetActuallyAllocatedSize. uptr LiableSize = Size + (OldHeader.Offset << MinAlignment); - if (LIKELY(!ThreadTornDown)) { - AllocatorQuarantine.Put(&ThreadQuarantineCache, - QuarantineCallback(&Cache), Chunk, LiableSize); - } else { - SpinMutexLock l(&FallbackMutex); - AllocatorQuarantine.Put(&FallbackQuarantineCache, - QuarantineCallback(&FallbackAllocatorCache), - Chunk, LiableSize); - } + quarantineOrDeallocateChunk(Chunk, &OldHeader, LiableSize); } // Reallocates a chunk. We can save on a new allocation if the new requested @@ -541,11 +561,11 @@ struct ScudoAllocator { OldPtr); } uptr UsableSize = Chunk->getUsableSize(&OldHeader); - UnpackedHeader NewHeader = OldHeader; // The new size still fits in the current chunk, and the size difference // is reasonable. if (NewSize <= UsableSize && (UsableSize - NewSize) < (SizeClassMap::kMaxSize / 2)) { + UnpackedHeader NewHeader = OldHeader; NewHeader.SizeOrUnusedBytes = OldHeader.FromPrimary ? NewSize : UsableSize - NewSize; Chunk->compareExchangeHeader(&NewHeader, &OldHeader); @@ -558,17 +578,7 @@ struct ScudoAllocator { uptr OldSize = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes : UsableSize - OldHeader.SizeOrUnusedBytes; memcpy(NewPtr, OldPtr, Min(NewSize, OldSize)); - NewHeader.State = ChunkQuarantine; - Chunk->compareExchangeHeader(&NewHeader, &OldHeader); - if (LIKELY(!ThreadTornDown)) { - AllocatorQuarantine.Put(&ThreadQuarantineCache, - QuarantineCallback(&Cache), Chunk, UsableSize); - } else { - SpinMutexLock l(&FallbackMutex); - AllocatorQuarantine.Put(&FallbackQuarantineCache, - QuarantineCallback(&FallbackAllocatorCache), - Chunk, UsableSize); - } + quarantineOrDeallocateChunk(Chunk, &OldHeader, UsableSize); } return NewPtr; } diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_external.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_external.cc index dc8ec62322ce..88468e406651 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_external.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_external.cc @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// #include "tsan_rtl.h" +#include "tsan_interceptors.h" namespace __tsan { @@ -29,6 +30,20 @@ const char *GetObjectTypeFromTag(uptr tag) { return registered_tags[tag]; } +typedef void(*AccessFunc)(ThreadState *, uptr, uptr, int); +void ExternalAccess(void *addr, void *caller_pc, void *tag, AccessFunc access) { + CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed)); + ThreadState *thr = cur_thread(); + thr->external_tag = (uptr)tag; + if (caller_pc) FuncEntry(thr, (uptr)caller_pc); + bool in_ignored_lib; + if (!caller_pc || !libignore()->IsIgnored((uptr)caller_pc, &in_ignored_lib)) { + access(thr, CALLERPC, (uptr)addr, kSizeLog1); + } + if (caller_pc) FuncExit(thr); + thr->external_tag = 0; +} + extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void *__tsan_external_register_tag(const char *object_type) { @@ -54,24 +69,12 @@ void __tsan_external_assign_tag(void *addr, void *tag) { SANITIZER_INTERFACE_ATTRIBUTE void __tsan_external_read(void *addr, void *caller_pc, void *tag) { - CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed)); - ThreadState *thr = cur_thread(); - thr->external_tag = (uptr)tag; - FuncEntry(thr, (uptr)caller_pc); - MemoryRead(thr, CALLERPC, (uptr)addr, kSizeLog8); - FuncExit(thr); - thr->external_tag = 0; + ExternalAccess(addr, caller_pc, tag, MemoryRead); } SANITIZER_INTERFACE_ATTRIBUTE void __tsan_external_write(void *addr, void *caller_pc, void *tag) { - CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed)); - ThreadState *thr = cur_thread(); - thr->external_tag = (uptr)tag; - FuncEntry(thr, (uptr)caller_pc); - MemoryWrite(thr, CALLERPC, (uptr)addr, kSizeLog8); - FuncExit(thr); - thr->external_tag = 0; + ExternalAccess(addr, caller_pc, tag, MemoryWrite); } } // extern "C" diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc index d0fd91aec234..334cc326daf6 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc @@ -210,7 +210,7 @@ struct ThreadSignalContext { // The object is 64-byte aligned, because we want hot data to be located in // a single cache line if possible (it's accessed in every interceptor). static ALIGNED(64) char libignore_placeholder[sizeof(LibIgnore)]; -static LibIgnore *libignore() { +LibIgnore *libignore() { return reinterpret_cast(&libignore_placeholder[0]); } @@ -269,6 +269,7 @@ ScopedInterceptor::~ScopedInterceptor() { void ScopedInterceptor::EnableIgnores() { if (ignoring_) { ThreadIgnoreBegin(thr_, pc_, false); + if (flags()->ignore_noninstrumented_modules) thr_->suppress_reports++; if (in_ignored_lib_) { DCHECK(!thr_->in_ignored_lib); thr_->in_ignored_lib = true; @@ -279,6 +280,7 @@ void ScopedInterceptor::EnableIgnores() { void ScopedInterceptor::DisableIgnores() { if (ignoring_) { ThreadIgnoreEnd(thr_, pc_); + if (flags()->ignore_noninstrumented_modules) thr_->suppress_reports--; if (in_ignored_lib_) { DCHECK(thr_->in_ignored_lib); thr_->in_ignored_lib = false; diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.h b/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.h index 72534f4a24a6..de47466501da 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.h +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_interceptors.h @@ -19,6 +19,8 @@ class ScopedInterceptor { bool ignoring_; }; +LibIgnore *libignore(); + } // namespace __tsan #define SCOPED_INTERCEPTOR_RAW(func, ...) \ diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_report.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_report.cc index 7de00840cdbc..af5fe61761d7 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_report.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_report.cc @@ -169,7 +169,7 @@ static void PrintMop(const ReportMop *mop, bool first) { MopDesc(first, mop->write, mop->atomic), mop->size, (void *)mop->addr, thread_name(thrbuf, mop->tid)); } else { - Printf(" %s access of object %s at %p by %s", + Printf(" %s access of %s at %p by %s", ExternalMopDesc(first, mop->write), object_type, (void *)mop->addr, thread_name(thrbuf, mop->tid)); } @@ -202,7 +202,7 @@ static void PrintLocation(const ReportLocation *loc) { loc->heap_chunk_size, loc->heap_chunk_start, thread_name(thrbuf, loc->tid)); } else { - Printf(" Location is %s object of size %zu at %p allocated by %s:\n", + Printf(" Location is %s of size %zu at %p allocated by %s:\n", object_type, loc->heap_chunk_size, loc->heap_chunk_start, thread_name(thrbuf, loc->tid)); } diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.h index 3481c31ebb1c..09c97a3a4f3d 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -381,6 +381,7 @@ struct ThreadState { // for better performance. int ignore_reads_and_writes; int ignore_sync; + int suppress_reports; // Go does not support ignores. #if !SANITIZER_GO IgnoreSet mop_ignore_set; diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cc index 31b9e97898b0..5cd93a184ce7 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cc @@ -500,7 +500,7 @@ static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2], } bool OutputReport(ThreadState *thr, const ScopedReport &srep) { - if (!flags()->report_bugs) + if (!flags()->report_bugs || thr->suppress_reports) return false; atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime()); const ReportDesc *rep = srep.GetReport(); diff --git a/contrib/libc++/include/atomic b/contrib/libc++/include/atomic index a17bdce1cc6f..f55e28ff5e92 100644 --- a/contrib/libc++/include/atomic +++ b/contrib/libc++/include/atomic @@ -861,16 +861,29 @@ kill_dependency(_Tp __y) _NOEXCEPT return __y; } -#define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE -#define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE -#define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE -#define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE -#define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE -#define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE -#define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE -#define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE -#define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE -#define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE +#if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) +# define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE +# define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE +# define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE +# define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE +# define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE +# define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE +# define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE +# define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE +# define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE +# define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE +#else +# define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +# define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +# define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +# define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +# define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE +# define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE +# define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE +# define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE +# define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE +# define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE +#endif // general atomic diff --git a/contrib/libc++/include/math.h b/contrib/libc++/include/math.h index 79ebf2f119d7..8c30ba85d22c 100644 --- a/contrib/libc++/include/math.h +++ b/contrib/libc++/include/math.h @@ -307,6 +307,7 @@ long double truncl(long double x); extern "C++" { #include +#include // signbit @@ -324,22 +325,50 @@ __libcpp_signbit(_A1 __lcpp_x) _NOEXCEPT template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, bool>::type +typename std::enable_if::value, bool>::type signbit(_A1 __lcpp_x) _NOEXCEPT { return __libcpp_signbit((typename std::__promote<_A1>::type)__lcpp_x); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if< + std::is_integral<_A1>::value && std::is_signed<_A1>::value, bool>::type +signbit(_A1 __lcpp_x) _NOEXCEPT +{ return __lcpp_x < 0; } + +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if< + std::is_integral<_A1>::value && !std::is_signed<_A1>::value, bool>::type +signbit(_A1) _NOEXCEPT +{ return false; } + #elif defined(_LIBCPP_MSVCRT) template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, bool>::type +typename std::enable_if::value, bool>::type signbit(_A1 __lcpp_x) _NOEXCEPT { return ::signbit(static_cast::type>(__lcpp_x)); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if< + std::is_integral<_A1>::value && std::is_signed<_A1>::value, bool>::type +signbit(_A1 __lcpp_x) _NOEXCEPT +{ return __lcpp_x < 0; } + +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if< + std::is_integral<_A1>::value && !std::is_signed<_A1>::value, bool>::type +signbit(_A1) _NOEXCEPT +{ return false; } + #endif // signbit // fpclassify @@ -358,22 +387,34 @@ __libcpp_fpclassify(_A1 __lcpp_x) _NOEXCEPT template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, int>::type +typename std::enable_if::value, int>::type fpclassify(_A1 __lcpp_x) _NOEXCEPT { return __libcpp_fpclassify((typename std::__promote<_A1>::type)__lcpp_x); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if::value, int>::type +fpclassify(_A1 __lcpp_x) _NOEXCEPT +{ return __lcpp_x == 0 ? FP_ZERO : FP_NORMAL; } + #elif defined(_LIBCPP_MSVCRT) template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, int>::type +typename std::enable_if::value, bool>::type fpclassify(_A1 __lcpp_x) _NOEXCEPT { return ::fpclassify(static_cast::type>(__lcpp_x)); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if::value, int>::type +fpclassify(_A1 __lcpp_x) _NOEXCEPT +{ return __lcpp_x == 0 ? FP_ZERO : FP_NORMAL; } + #endif // fpclassify // isfinite @@ -392,12 +433,22 @@ __libcpp_isfinite(_A1 __lcpp_x) _NOEXCEPT template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, bool>::type +typename std::enable_if< + std::is_arithmetic<_A1>::value && std::numeric_limits<_A1>::has_infinity, + bool>::type isfinite(_A1 __lcpp_x) _NOEXCEPT { return __libcpp_isfinite((typename std::__promote<_A1>::type)__lcpp_x); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if< + std::is_arithmetic<_A1>::value && !std::numeric_limits<_A1>::has_infinity, + bool>::type +isfinite(_A1) _NOEXCEPT +{ return true; } + #endif // isfinite // isinf @@ -416,12 +467,22 @@ __libcpp_isinf(_A1 __lcpp_x) _NOEXCEPT template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, bool>::type +typename std::enable_if< + std::is_arithmetic<_A1>::value && std::numeric_limits<_A1>::has_infinity, + bool>::type isinf(_A1 __lcpp_x) _NOEXCEPT { return __libcpp_isinf((typename std::__promote<_A1>::type)__lcpp_x); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if< + std::is_arithmetic<_A1>::value && !std::numeric_limits<_A1>::has_infinity, + bool>::type +isinf(_A1) _NOEXCEPT +{ return false; } + #endif // isinf // isnan @@ -440,12 +501,18 @@ __libcpp_isnan(_A1 __lcpp_x) _NOEXCEPT template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, bool>::type +typename std::enable_if::value, bool>::type isnan(_A1 __lcpp_x) _NOEXCEPT { return __libcpp_isnan((typename std::__promote<_A1>::type)__lcpp_x); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if::value, bool>::type +isnan(_A1) _NOEXCEPT +{ return false; } + #endif // isnan // isnormal @@ -464,12 +531,18 @@ __libcpp_isnormal(_A1 __lcpp_x) _NOEXCEPT template inline _LIBCPP_INLINE_VISIBILITY -typename std::enable_if::value, bool>::type +typename std::enable_if::value, bool>::type isnormal(_A1 __lcpp_x) _NOEXCEPT { return __libcpp_isnormal((typename std::__promote<_A1>::type)__lcpp_x); } +template +inline _LIBCPP_INLINE_VISIBILITY +typename std::enable_if::value, bool>::type +isnormal(_A1 __lcpp_x) _NOEXCEPT +{ return __lcpp_x != 0; } + #endif // isnormal // isgreater diff --git a/contrib/libc++/include/mutex b/contrib/libc++/include/mutex index e92baa51ae75..0b25614e8ead 100644 --- a/contrib/libc++/include/mutex +++ b/contrib/libc++/include/mutex @@ -685,7 +685,7 @@ inline _LIBCPP_INLINE_VISIBILITY void call_once(once_flag& __flag, const _Callable& __func) { - if (__flag.__state_ != ~0ul) + if (__libcpp_acquire_load(&__flag.__state_) != ~0ul) { __call_once_param __p(__func); __call_once(__flag.__state_, &__p, &__call_once_proxy); diff --git a/contrib/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm/include/llvm/ADT/APFloat.h index e7e5036e6930..bef6efde1f01 100644 --- a/contrib/llvm/include/llvm/ADT/APFloat.h +++ b/contrib/llvm/include/llvm/ADT/APFloat.h @@ -397,6 +397,12 @@ class IEEEFloat final : public APFloatBase { /// consider inserting before falling back to scientific /// notation. 0 means to always use scientific notation. /// + /// \param TruncateZero Indicate whether to remove the trailing zero in + /// fraction part or not. Also setting this parameter to false forcing + /// producing of output more similar to default printf behavior. + /// Specifically the lower e is used as exponent delimiter and exponent + /// always contains no less than two digits. + /// /// Number Precision MaxPadding Result /// ------ --------- ---------- ------ /// 1.01E+4 5 2 10100 @@ -406,7 +412,7 @@ class IEEEFloat final : public APFloatBase { /// 1.01E-2 4 2 0.0101 /// 1.01E-2 4 1 1.01E-2 void toString(SmallVectorImpl &Str, unsigned FormatPrecision = 0, - unsigned FormatMaxPadding = 3) const; + unsigned FormatMaxPadding = 3, bool TruncateZero = true) const; /// If this value has an exact multiplicative inverse, store it in inv and /// return true. @@ -649,7 +655,7 @@ class DoubleAPFloat final : public APFloatBase { bool isInteger() const; void toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const; + unsigned FormatMaxPadding, bool TruncateZero = true) const; bool getExactInverse(APFloat *inv) const; @@ -1144,9 +1150,9 @@ class APFloat : public APFloatBase { APFloat &operator=(APFloat &&RHS) = default; void toString(SmallVectorImpl &Str, unsigned FormatPrecision = 0, - unsigned FormatMaxPadding = 3) const { + unsigned FormatMaxPadding = 3, bool TruncateZero = true) const { APFLOAT_DISPATCH_ON_SEMANTICS( - toString(Str, FormatPrecision, FormatMaxPadding)); + toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero)); } void print(raw_ostream &) const; diff --git a/contrib/llvm/include/llvm/ADT/APInt.h b/contrib/llvm/include/llvm/ADT/APInt.h index ceb623d34531..d0104c3f0fa9 100644 --- a/contrib/llvm/include/llvm/ADT/APInt.h +++ b/contrib/llvm/include/llvm/ADT/APInt.h @@ -78,6 +78,8 @@ class LLVM_NODISCARD APInt { APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT }; + static const WordType WORD_MAX = ~WordType(0); + private: /// This union is used to store the integer value. When the /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. @@ -90,6 +92,8 @@ class LLVM_NODISCARD APInt { friend struct DenseMapAPIntKeyInfo; + friend class APSInt; + /// \brief Fast internal constructor /// /// This constructor is used only internally for speed of construction of @@ -134,15 +138,10 @@ class LLVM_NODISCARD APInt { /// zero'd out. APInt &clearUnusedBits() { // Compute how many bits are used in the final word - unsigned wordBits = BitWidth % APINT_BITS_PER_WORD; - if (wordBits == 0) - // If all bits are used, we want to leave the value alone. This also - // avoids the undefined behavior of >> when the shift is the same size as - // the word size (64). - return *this; + unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1; // Mask out the high bits. - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - wordBits); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - WordBits); if (isSingleWord()) VAL &= mask; else @@ -194,6 +193,9 @@ class LLVM_NODISCARD APInt { /// out-of-line slow case for lshr. void lshrSlowCase(unsigned ShiftAmt); + /// out-of-line slow case for ashr. + void ashrSlowCase(unsigned ShiftAmt); + /// out-of-line slow case for operator= void AssignSlowCase(const APInt &RHS); @@ -230,6 +232,14 @@ class LLVM_NODISCARD APInt { /// out-of-line slow case for operator^=. void XorAssignSlowCase(const APInt& RHS); + /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compare(const APInt &RHS) const LLVM_READONLY; + + /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compareSigned(const APInt &RHS) const LLVM_READONLY; + public: /// \name Constructors /// @{ @@ -363,7 +373,7 @@ class LLVM_NODISCARD APInt { /// This checks to see if the value has all bits of the APInt are set or not. bool isAllOnesValue() const { if (isSingleWord()) - return VAL == UINT64_MAX >> (APINT_BITS_PER_WORD - BitWidth); + return VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth); return countPopulationSlowCase() == BitWidth; } @@ -445,7 +455,7 @@ class LLVM_NODISCARD APInt { assert(numBits != 0 && "numBits must be non-zero"); assert(numBits <= BitWidth && "numBits out of range"); if (isSingleWord()) - return VAL == (UINT64_MAX >> (APINT_BITS_PER_WORD - numBits)); + return VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits)); unsigned Ones = countTrailingOnesSlowCase(); return (numBits == Ones) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); @@ -509,7 +519,7 @@ class LLVM_NODISCARD APInt { /// /// \returns the all-ones value for an APInt of the specified bit-width. static APInt getAllOnesValue(unsigned numBits) { - return APInt(numBits, UINT64_MAX, true); + return APInt(numBits, WORD_MAX, true); } /// \brief Get the '0' value. @@ -886,7 +896,26 @@ class LLVM_NODISCARD APInt { /// \brief Arithmetic right-shift function. /// /// Arithmetic right-shift this APInt by shiftAmt. - APInt ashr(unsigned shiftAmt) const; + APInt ashr(unsigned ShiftAmt) const { + APInt R(*this); + R.ashrInPlace(ShiftAmt); + return R; + } + + /// Arithmetic right-shift this APInt by ShiftAmt in place. + void ashrInPlace(unsigned ShiftAmt) { + assert(ShiftAmt <= BitWidth && "Invalid shift amount"); + if (isSingleWord()) { + int64_t SExtVAL = SignExtend64(VAL, BitWidth); + if (ShiftAmt == BitWidth) + VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. + else + VAL = SExtVAL >> ShiftAmt; + clearUnusedBits(); + return; + } + ashrSlowCase(ShiftAmt); + } /// \brief Logical right-shift function. /// @@ -928,7 +957,14 @@ class LLVM_NODISCARD APInt { /// \brief Arithmetic right-shift function. /// /// Arithmetic right-shift this APInt by shiftAmt. - APInt ashr(const APInt &shiftAmt) const; + APInt ashr(const APInt &ShiftAmt) const { + APInt R(*this); + R.ashrInPlace(ShiftAmt); + return R; + } + + /// Arithmetic right-shift this APInt by shiftAmt in place. + void ashrInPlace(const APInt &shiftAmt); /// \brief Logical right-shift function. /// @@ -1079,7 +1115,7 @@ class LLVM_NODISCARD APInt { /// the validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered unsigned. - bool ult(const APInt &RHS) const LLVM_READONLY; + bool ult(const APInt &RHS) const { return compare(RHS) < 0; } /// \brief Unsigned less than comparison /// @@ -1098,7 +1134,7 @@ class LLVM_NODISCARD APInt { /// validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered signed. - bool slt(const APInt &RHS) const LLVM_READONLY; + bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; } /// \brief Signed less than comparison /// @@ -1117,7 +1153,7 @@ class LLVM_NODISCARD APInt { /// validity of the less-or-equal relationship. /// /// \returns true if *this <= RHS when both are considered unsigned. - bool ule(const APInt &RHS) const { return ult(RHS) || eq(RHS); } + bool ule(const APInt &RHS) const { return compare(RHS) <= 0; } /// \brief Unsigned less or equal comparison /// @@ -1133,7 +1169,7 @@ class LLVM_NODISCARD APInt { /// validity of the less-or-equal relationship. /// /// \returns true if *this <= RHS when both are considered signed. - bool sle(const APInt &RHS) const { return slt(RHS) || eq(RHS); } + bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; } /// \brief Signed less or equal comparison /// @@ -1149,7 +1185,7 @@ class LLVM_NODISCARD APInt { /// the validity of the greater-than relationship. /// /// \returns true if *this > RHS when both are considered unsigned. - bool ugt(const APInt &RHS) const { return !ult(RHS) && !eq(RHS); } + bool ugt(const APInt &RHS) const { return !ule(RHS); } /// \brief Unsigned greater than comparison /// @@ -1168,7 +1204,7 @@ class LLVM_NODISCARD APInt { /// validity of the greater-than relationship. /// /// \returns true if *this > RHS when both are considered signed. - bool sgt(const APInt &RHS) const { return !slt(RHS) && !eq(RHS); } + bool sgt(const APInt &RHS) const { return !sle(RHS); } /// \brief Signed greater than comparison /// @@ -1286,7 +1322,7 @@ class LLVM_NODISCARD APInt { /// \brief Set every bit to 1. void setAllBits() { if (isSingleWord()) - VAL = UINT64_MAX; + VAL = WORD_MAX; else // Set all the bits in all the words. memset(pVal, -1, getNumWords() * APINT_WORD_SIZE); @@ -1316,7 +1352,7 @@ class LLVM_NODISCARD APInt { return; } if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); mask <<= loBit; if (isSingleWord()) VAL |= mask; @@ -1358,7 +1394,7 @@ class LLVM_NODISCARD APInt { /// \brief Toggle every bit to its opposite value. void flipAllBits() { if (isSingleWord()) { - VAL ^= UINT64_MAX; + VAL ^= WORD_MAX; clearUnusedBits(); } else { flipAllBitsSlowCase(); @@ -1653,7 +1689,7 @@ class LLVM_NODISCARD APInt { /// referencing 2 in a space where 2 does no exist. unsigned nearestLogBase2() const { // Special case when we have a bitwidth of 1. If VAL is 1, then we - // get 0. If VAL is 0, we get UINT64_MAX which gets truncated to + // get 0. If VAL is 0, we get WORD_MAX which gets truncated to // UINT32_MAX. if (BitWidth == 1) return VAL - 1; diff --git a/contrib/llvm/include/llvm/ADT/APSInt.h b/contrib/llvm/include/llvm/ADT/APSInt.h index 5b6dfa4a4b64..dabbf3314bd0 100644 --- a/contrib/llvm/include/llvm/ADT/APSInt.h +++ b/contrib/llvm/include/llvm/ADT/APSInt.h @@ -125,7 +125,10 @@ class LLVM_NODISCARD APSInt : public APInt { return IsUnsigned ? APSInt(lshr(Amt), true) : APSInt(ashr(Amt), false); } APSInt& operator>>=(unsigned Amt) { - *this = *this >> Amt; + if (IsUnsigned) + lshrInPlace(Amt); + else + ashrInPlace(Amt); return *this; } @@ -179,7 +182,7 @@ class LLVM_NODISCARD APSInt : public APInt { return APSInt(static_cast(*this) << Bits, IsUnsigned); } APSInt& operator<<=(unsigned Amt) { - *this = *this << Amt; + static_cast(*this) <<= Amt; return *this; } @@ -285,12 +288,12 @@ class LLVM_NODISCARD APSInt : public APInt { /// \brief Compare underlying values of two numbers. static int compareValues(const APSInt &I1, const APSInt &I2) { if (I1.getBitWidth() == I2.getBitWidth() && I1.isSigned() == I2.isSigned()) - return I1 == I2 ? 0 : I1 > I2 ? 1 : -1; + return I1.IsUnsigned ? I1.compare(I2) : I1.compareSigned(I2); // Check for a bit-width mismatch. if (I1.getBitWidth() > I2.getBitWidth()) return compareValues(I1, I2.extend(I1.getBitWidth())); - else if (I2.getBitWidth() > I1.getBitWidth()) + if (I2.getBitWidth() > I1.getBitWidth()) return compareValues(I1.extend(I2.getBitWidth()), I2); // We have a signedness mismatch. Check for negative values and do an @@ -305,7 +308,7 @@ class LLVM_NODISCARD APSInt : public APInt { return 1; } - return I1.eq(I2) ? 0 : I1.ugt(I2) ? 1 : -1; + return I1.compare(I2); } static APSInt get(int64_t X) { return APSInt(APInt(64, X), false); } diff --git a/contrib/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm/include/llvm/ADT/BitVector.h index e48c023ae7df..5aa101591e6e 100644 --- a/contrib/llvm/include/llvm/ADT/BitVector.h +++ b/contrib/llvm/include/llvm/ADT/BitVector.h @@ -15,7 +15,6 @@ #define LLVM_ADT_BITVECTOR_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/MathExtras.h" #include #include @@ -35,9 +34,8 @@ class BitVector { static_assert(BITWORD_SIZE == 64 || BITWORD_SIZE == 32, "Unsupported word size"); - BitWord *Bits; // Actual bits. - unsigned Size; // Size of bitvector in bits. - unsigned Capacity; // Number of BitWords allocated in the Bits array. + MutableArrayRef Bits; // Actual bits. + unsigned Size; // Size of bitvector in bits. public: typedef unsigned size_type; @@ -77,16 +75,14 @@ class BitVector { /// BitVector default ctor - Creates an empty bitvector. - BitVector() : Size(0), Capacity(0) { - Bits = nullptr; - } + BitVector() : Size(0) {} /// BitVector ctor - Creates a bitvector of specified number of bits. All /// bits are initialized to the specified value. explicit BitVector(unsigned s, bool t = false) : Size(s) { - Capacity = NumBitWords(s); - Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord)); - init_words(Bits, Capacity, t); + size_t Capacity = NumBitWords(s); + Bits = allocate(Capacity); + init_words(Bits, t); if (t) clear_unused_bits(); } @@ -94,25 +90,21 @@ class BitVector { /// BitVector copy ctor. BitVector(const BitVector &RHS) : Size(RHS.size()) { if (Size == 0) { - Bits = nullptr; - Capacity = 0; + Bits = MutableArrayRef(); return; } - Capacity = NumBitWords(RHS.size()); - Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord)); - std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord)); + size_t Capacity = NumBitWords(RHS.size()); + Bits = allocate(Capacity); + std::memcpy(Bits.data(), RHS.Bits.data(), Capacity * sizeof(BitWord)); } - BitVector(BitVector &&RHS) - : Bits(RHS.Bits), Size(RHS.Size), Capacity(RHS.Capacity) { - RHS.Bits = nullptr; - RHS.Size = RHS.Capacity = 0; + BitVector(BitVector &&RHS) : Bits(RHS.Bits), Size(RHS.Size) { + RHS.Bits = MutableArrayRef(); + RHS.Size = 0; } - ~BitVector() { - std::free(Bits); - } + ~BitVector() { std::free(Bits.data()); } /// empty - Tests whether there are no bits in this bitvector. bool empty() const { return Size == 0; } @@ -163,6 +155,22 @@ class BitVector { return -1; } + /// find_last - Returns the index of the last set bit, -1 if none of the bits + /// are set. + int find_last() const { + if (Size == 0) + return -1; + + unsigned N = NumBitWords(size()); + assert(N > 0); + + unsigned i = N - 1; + while (i > 0 && Bits[i] == BitWord(0)) + --i; + + return int((i + 1) * BITWORD_SIZE - countLeadingZeros(Bits[i])) - 1; + } + /// find_first_unset - Returns the index of the first unset bit, -1 if all /// of the bits are set. int find_first_unset() const { @@ -174,6 +182,30 @@ class BitVector { return -1; } + /// find_last_unset - Returns the index of the last unset bit, -1 if all of + /// the bits are set. + int find_last_unset() const { + if (Size == 0) + return -1; + + const unsigned N = NumBitWords(size()); + assert(N > 0); + + unsigned i = N - 1; + BitWord W = Bits[i]; + + // The last word in the BitVector has some unused bits, so we need to set + // them all to 1 first. Set them all to 1 so they don't get treated as + // valid unset bits. + unsigned UnusedCount = BITWORD_SIZE - size() % BITWORD_SIZE; + W |= maskLeadingOnes(UnusedCount); + + while (W == ~BitWord(0) && --i > 0) + W = Bits[i]; + + return int((i + 1) * BITWORD_SIZE - countLeadingOnes(W)) - 1; + } + /// find_next - Returns the index of the next set bit following the /// "Prev" bit. Returns -1 if the next set bit is not found. int find_next(unsigned Prev) const { @@ -228,10 +260,10 @@ class BitVector { /// resize - Grow or shrink the bitvector. void resize(unsigned N, bool t = false) { - if (N > Capacity * BITWORD_SIZE) { - unsigned OldCapacity = Capacity; + if (N > getBitCapacity()) { + unsigned OldCapacity = Bits.size(); grow(N); - init_words(&Bits[OldCapacity], (Capacity-OldCapacity), t); + init_words(Bits.drop_front(OldCapacity), t); } // Set any old unused bits that are now included in the BitVector. This @@ -248,19 +280,19 @@ class BitVector { } void reserve(unsigned N) { - if (N > Capacity * BITWORD_SIZE) + if (N > getBitCapacity()) grow(N); } // Set, reset, flip BitVector &set() { - init_words(Bits, Capacity, true); + init_words(Bits, true); clear_unused_bits(); return *this; } BitVector &set(unsigned Idx) { - assert(Bits && "Bits never allocated"); + assert(Bits.data() && "Bits never allocated"); Bits[Idx / BITWORD_SIZE] |= BitWord(1) << (Idx % BITWORD_SIZE); return *this; } @@ -295,7 +327,7 @@ class BitVector { } BitVector &reset() { - init_words(Bits, Capacity, false); + init_words(Bits, false); return *this; } @@ -562,21 +594,21 @@ class BitVector { Size = RHS.size(); unsigned RHSWords = NumBitWords(Size); - if (Size <= Capacity * BITWORD_SIZE) { + if (Size <= getBitCapacity()) { if (Size) - std::memcpy(Bits, RHS.Bits, RHSWords * sizeof(BitWord)); + std::memcpy(Bits.data(), RHS.Bits.data(), RHSWords * sizeof(BitWord)); clear_unused_bits(); return *this; } // Grow the bitvector to have enough elements. - Capacity = RHSWords; - assert(Capacity > 0 && "negative capacity?"); - BitWord *NewBits = (BitWord *)std::malloc(Capacity * sizeof(BitWord)); - std::memcpy(NewBits, RHS.Bits, Capacity * sizeof(BitWord)); + unsigned NewCapacity = RHSWords; + assert(NewCapacity > 0 && "negative capacity?"); + auto NewBits = allocate(NewCapacity); + std::memcpy(NewBits.data(), RHS.Bits.data(), NewCapacity * sizeof(BitWord)); // Destroy the old bits. - std::free(Bits); + std::free(Bits.data()); Bits = NewBits; return *this; @@ -585,13 +617,12 @@ class BitVector { const BitVector &operator=(BitVector &&RHS) { if (this == &RHS) return *this; - std::free(Bits); + std::free(Bits.data()); Bits = RHS.Bits; Size = RHS.Size; - Capacity = RHS.Capacity; - RHS.Bits = nullptr; - RHS.Size = RHS.Capacity = 0; + RHS.Bits = MutableArrayRef(); + RHS.Size = 0; return *this; } @@ -599,7 +630,6 @@ class BitVector { void swap(BitVector &RHS) { std::swap(Bits, RHS.Bits); std::swap(Size, RHS.Size); - std::swap(Capacity, RHS.Capacity); } //===--------------------------------------------------------------------===// @@ -659,14 +689,14 @@ class BitVector { uint32_t NumWords = NumBitWords(Size); - auto Src = ArrayRef(Bits, NumWords).drop_back(Count); - auto Dest = MutableArrayRef(Bits, NumWords).drop_front(Count); + auto Src = Bits.take_front(NumWords).drop_back(Count); + auto Dest = Bits.take_front(NumWords).drop_front(Count); // Since we always move Word-sized chunks of data with src and dest both // aligned to a word-boundary, we don't need to worry about endianness // here. std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord)); - std::memset(Bits, 0, Count * sizeof(BitWord)); + std::memset(Bits.data(), 0, Count * sizeof(BitWord)); clear_unused_bits(); } @@ -679,14 +709,19 @@ class BitVector { uint32_t NumWords = NumBitWords(Size); - auto Src = ArrayRef(Bits, NumWords).drop_front(Count); - auto Dest = MutableArrayRef(Bits, NumWords).drop_back(Count); + auto Src = Bits.take_front(NumWords).drop_front(Count); + auto Dest = Bits.take_front(NumWords).drop_back(Count); assert(Dest.size() == Src.size()); std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord)); std::memset(Dest.end(), 0, Count * sizeof(BitWord)); } + MutableArrayRef allocate(size_t NumWords) { + BitWord *RawBits = (BitWord *)std::malloc(NumWords * sizeof(BitWord)); + return MutableArrayRef(RawBits, NumWords); + } + int next_unset_in_word(int WordIndex, BitWord Word) const { unsigned Result = WordIndex * BITWORD_SIZE + countTrailingOnes(Word); return Result < size() ? Result : -1; @@ -700,8 +735,8 @@ class BitVector { void set_unused_bits(bool t = true) { // Set high words first. unsigned UsedWords = NumBitWords(Size); - if (Capacity > UsedWords) - init_words(&Bits[UsedWords], (Capacity-UsedWords), t); + if (Bits.size() > UsedWords) + init_words(Bits.drop_front(UsedWords), t); // Then set any stray high bits of the last used word. unsigned ExtraBits = Size % BITWORD_SIZE; @@ -720,16 +755,17 @@ class BitVector { } void grow(unsigned NewSize) { - Capacity = std::max(NumBitWords(NewSize), Capacity * 2); - assert(Capacity > 0 && "realloc-ing zero space"); - Bits = (BitWord *)std::realloc(Bits, Capacity * sizeof(BitWord)); - + size_t NewCapacity = std::max(NumBitWords(NewSize), Bits.size() * 2); + assert(NewCapacity > 0 && "realloc-ing zero space"); + BitWord *NewBits = + (BitWord *)std::realloc(Bits.data(), NewCapacity * sizeof(BitWord)); + Bits = MutableArrayRef(NewBits, NewCapacity); clear_unused_bits(); } - void init_words(BitWord *B, unsigned NumWords, bool t) { - if (NumWords > 0) - memset(B, 0 - (int)t, NumWords*sizeof(BitWord)); + void init_words(MutableArrayRef B, bool t) { + if (B.size() > 0) + memset(B.data(), 0 - (int)t, B.size() * sizeof(BitWord)); } template @@ -761,7 +797,8 @@ class BitVector { public: /// Return the size (in bytes) of the bit vector. - size_t getMemorySize() const { return Capacity * sizeof(BitWord); } + size_t getMemorySize() const { return Bits.size() * sizeof(BitWord); } + size_t getBitCapacity() const { return Bits.size() * BITWORD_SIZE; } }; static inline size_t capacity_in_bytes(const BitVector &X) { diff --git a/contrib/llvm/include/llvm/ADT/SmallBitVector.h b/contrib/llvm/include/llvm/ADT/SmallBitVector.h index 607e040a606c..bf16af5933f0 100644 --- a/contrib/llvm/include/llvm/ADT/SmallBitVector.h +++ b/contrib/llvm/include/llvm/ADT/SmallBitVector.h @@ -117,9 +117,7 @@ class SmallBitVector { } // Return the size. - size_t getSmallSize() const { - return getSmallRawBits() >> SmallNumDataBits; - } + size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; } void setSmallSize(size_t Size) { setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits)); @@ -216,6 +214,16 @@ class SmallBitVector { return getPointer()->find_first(); } + int find_last() const { + if (isSmall()) { + uintptr_t Bits = getSmallBits(); + if (Bits == 0) + return -1; + return NumBaseBits - countLeadingZeros(Bits); + } + return getPointer()->find_last(); + } + /// Returns the index of the first unset bit, -1 if all of the bits are set. int find_first_unset() const { if (isSmall()) { @@ -228,6 +236,17 @@ class SmallBitVector { return getPointer()->find_first_unset(); } + int find_last_unset() const { + if (isSmall()) { + if (count() == getSmallSize()) + return -1; + + uintptr_t Bits = getSmallBits(); + return NumBaseBits - countLeadingOnes(Bits); + } + return getPointer()->find_last_unset(); + } + /// Returns the index of the next set bit following the "Prev" bit. /// Returns -1 if the next set bit is not found. int find_next(unsigned Prev) const { diff --git a/contrib/llvm/include/llvm/ADT/StringExtras.h b/contrib/llvm/include/llvm/ADT/StringExtras.h index 8214782bfe80..26f11924b771 100644 --- a/contrib/llvm/include/llvm/ADT/StringExtras.h +++ b/contrib/llvm/include/llvm/ADT/StringExtras.h @@ -76,6 +76,36 @@ static inline std::string toHex(StringRef Input) { return Output; } +static inline uint8_t hexFromNibbles(char MSB, char LSB) { + unsigned U1 = hexDigitValue(MSB); + unsigned U2 = hexDigitValue(LSB); + assert(U1 != -1U && U2 != -1U); + + return static_cast((U1 << 4) | U2); +} + +/// Convert hexadecimal string \p Input to its binary representation. +/// The return string is half the size of \p Input. +static inline std::string fromHex(StringRef Input) { + if (Input.empty()) + return std::string(); + + std::string Output; + Output.reserve((Input.size() + 1) / 2); + if (Input.size() % 2 == 1) { + Output.push_back(hexFromNibbles('0', Input.front())); + Input = Input.drop_front(); + } + + assert(Input.size() % 2 == 0); + while (!Input.empty()) { + uint8_t Hex = hexFromNibbles(Input[0], Input[1]); + Output.push_back(Hex); + Input = Input.drop_front(2); + } + return Output; +} + static inline std::string utostr(uint64_t X, bool isNeg = false) { char Buffer[21]; char *BufPtr = std::end(Buffer); diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h index e271075b7e2a..e3a8a31ba9bc 100644 --- a/contrib/llvm/include/llvm/ADT/Triple.h +++ b/contrib/llvm/include/llvm/ADT/Triple.h @@ -140,7 +140,8 @@ class Triple { Myriad, AMD, Mesa, - LastVendorType = Mesa + SUSE, + LastVendorType = SUSE }; enum OSType { UnknownOS, diff --git a/contrib/llvm/include/llvm/Analysis/DemandedBits.h b/contrib/llvm/include/llvm/Analysis/DemandedBits.h index c603274a7161..e5fd8a0007fe 100644 --- a/contrib/llvm/include/llvm/Analysis/DemandedBits.h +++ b/contrib/llvm/include/llvm/Analysis/DemandedBits.h @@ -35,6 +35,7 @@ class Function; class Instruction; class DominatorTree; class AssumptionCache; +struct KnownBits; class DemandedBits { public: @@ -58,8 +59,7 @@ class DemandedBits { void determineLiveOperandBits(const Instruction *UserI, const Instruction *I, unsigned OperandNo, const APInt &AOut, APInt &AB, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2); + KnownBits &Known, KnownBits &Known2); bool Analyzed; diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h index b829e995db05..25240dae75e7 100644 --- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -47,7 +47,32 @@ namespace llvm { class Type; class Value; + struct SimplifyQuery { + const DataLayout &DL; + const TargetLibraryInfo *TLI = nullptr; + const DominatorTree *DT = nullptr; + AssumptionCache *AC = nullptr; + const Instruction *CxtI = nullptr; + SimplifyQuery(const DataLayout &DL) : DL(DL) {} + + SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC = nullptr, + const Instruction *CXTI = nullptr) + : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {} + SimplifyQuery getWithInstruction(Instruction *I) const { + SimplifyQuery Copy(*this); + Copy.CxtI = I; + return Copy; + } + }; + + // NOTE: the explicit multiple argument versions of these functions are + // deprecated. + // Please use the SimplifyQuery versions in new code. + /// Given operands for an Add, fold the result or return null. + Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -56,6 +81,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a Sub, fold the result or return null. + Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -64,6 +91,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an FAdd, fold the result or return null. + Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -72,6 +101,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an FSub, fold the result or return null. + Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -80,6 +111,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an FMul, fold the result or return null. + Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -88,6 +121,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a Mul, fold the result or return null. + Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -95,6 +129,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an SDiv, fold the result or return null. + Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifySDivInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -102,6 +137,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a UDiv, fold the result or return null. + Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -109,6 +145,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an FDiv, fold the result or return null. + Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -117,6 +155,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an SRem, fold the result or return null. + Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -124,6 +163,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a URem, fold the result or return null. + Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyURemInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -131,6 +171,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an FRem, fold the result or return null. + Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -139,6 +181,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a Shl, fold the result or return null. + Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q); Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -147,6 +191,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a LShr, fold the result or return null. + Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -155,6 +201,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a AShr, fold the result or return nulll. + Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -163,6 +211,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an And, fold the result or return null. + Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -170,6 +219,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an Or, fold the result or return null. + Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -177,6 +227,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an Xor, fold the result or return null. + Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -184,6 +235,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an ICmpInst, fold the result or return null. + Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -192,6 +245,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an FCmpInst, fold the result or return null. + Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -200,6 +255,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a SelectInst, fold the result or return null. + Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const SimplifyQuery &Q); Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -207,7 +264,9 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); - /// Given operands for a GetElementPtrInst, fold the result or return null. + /// Given operands for a GetElementPtrInst, fold the result or return null. + Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, + const SimplifyQuery &Q); Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -216,6 +275,9 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an InsertValueInst, fold the result or return null. + Value *SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef Idxs, + const SimplifyQuery &Q); Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -224,6 +286,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an ExtractValueInst, fold the result or return null. + Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, + const SimplifyQuery &Q); Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -232,6 +296,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for an ExtractElementInst, fold the result or return null. + Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q); Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -240,6 +306,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a CastInst, fold the result or return null. + Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q); Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -248,6 +316,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a ShuffleVectorInst, fold the result or return null. + Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q); Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -259,6 +329,8 @@ namespace llvm { /// Given operands for a CmpInst, fold the result or return null. + Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -267,6 +339,8 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given operands for a BinaryOperator, fold the result or return null. + Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const SimplifyQuery &Q); Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -278,7 +352,9 @@ namespace llvm { /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const DataLayout &DL, + FastMathFlags FMF, const SimplifyQuery &Q); + Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, @@ -286,6 +362,8 @@ namespace llvm { /// Given a function and iterators over arguments, fold the result or return /// null. + Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const SimplifyQuery &Q); Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, @@ -294,6 +372,7 @@ namespace llvm { const Instruction *CxtI = nullptr); /// Given a function and set of arguments, fold the result or return null. + Value *SimplifyCall(Value *V, ArrayRef Args, const SimplifyQuery &Q); Value *SimplifyCall(Value *V, ArrayRef Args, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, @@ -302,6 +381,8 @@ namespace llvm { /// See if we can compute a simplified version of this instruction. If not, /// return null. + Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE = nullptr); Value *SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h index 2fad1737d1c0..096df1e421a7 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h +++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h @@ -158,7 +158,7 @@ class LoopBase { /// True if terminator in the block can branch to another block that is /// outside of the current loop. bool isLoopExiting(const BlockT *BB) const { - for (const auto Succ : children(BB)) { + for (const auto &Succ : children(BB)) { if (!contains(Succ)) return true; } diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h index 6dc0422ce0e9..66c9f68afc60 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -35,7 +35,7 @@ template void LoopBase:: getExitingBlocks(SmallVectorImpl &ExitingBlocks) const { for (const auto BB : blocks()) - for (const auto Succ : children(BB)) + for (const auto &Succ : children(BB)) if (!contains(Succ)) { // Not in current loop? It must be an exit block. ExitingBlocks.push_back(BB); @@ -61,7 +61,7 @@ template void LoopBase:: getExitBlocks(SmallVectorImpl &ExitBlocks) const { for (const auto BB : blocks()) - for (const auto Succ : children(BB)) + for (const auto &Succ : children(BB)) if (!contains(Succ)) // Not in current loop? It must be an exit block. ExitBlocks.push_back(Succ); @@ -83,7 +83,7 @@ template void LoopBase:: getExitEdges(SmallVectorImpl &ExitEdges) const { for (const auto BB : blocks()) - for (const auto Succ : children(BB)) + for (const auto &Succ : children(BB)) if (!contains(Succ)) // Not in current loop? It must be an exit block. ExitEdges.emplace_back(BB, Succ); diff --git a/contrib/llvm/include/llvm/Analysis/RegionInfo.h b/contrib/llvm/include/llvm/Analysis/RegionInfo.h index caeb21db613e..16ee07fa3177 100644 --- a/contrib/llvm/include/llvm/Analysis/RegionInfo.h +++ b/contrib/llvm/include/llvm/Analysis/RegionInfo.h @@ -708,10 +708,24 @@ class RegionInfoBase { /// The top level region. RegionT *TopLevelRegion; -private: /// Map every BB to the smallest region, that contains BB. BBtoRegionMap BBtoRegion; +protected: + /// \brief Update refences to a RegionInfoT held by the RegionT managed here + /// + /// This is a post-move helper. Regions hold references to the owning + /// RegionInfo object. After a move these need to be fixed. + template + void updateRegionTree(RegionInfoT &RI, TheRegionT *R) { + if (!R) + return; + R->RI = &RI; + for (auto &SubR : *R) + updateRegionTree(RI, SubR.get()); + } + +private: /// \brief Wipe this region tree's state without releasing any resources. /// /// This is essentially a post-move helper only. It leaves the object in an @@ -879,10 +893,12 @@ class RegionInfo : public RegionInfoBase> { ~RegionInfo() override; - RegionInfo(RegionInfo &&Arg) - : Base(std::move(static_cast(Arg))) {} + RegionInfo(RegionInfo &&Arg) : Base(std::move(static_cast(Arg))) { + updateRegionTree(*this, TopLevelRegion); + } RegionInfo &operator=(RegionInfo &&RHS) { Base::operator=(std::move(static_cast(RHS))); + updateRegionTree(*this, TopLevelRegion); return *this; } diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h index 91aeae0f728f..54bc4dcfd2cd 100644 --- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -877,6 +877,47 @@ class ScalarEvolution { bool ControlsExit, bool AllowPredicates = false); + // Helper functions for computeExitLimitFromCond to avoid exponential time + // complexity. + + class ExitLimitCache { + // It may look like we need key on the whole (L, TBB, FBB, ControlsExit, + // AllowPredicates) tuple, but recursive calls to + // computeExitLimitFromCondCached from computeExitLimitFromCondImpl only + // vary the in \c ExitCond and \c ControlsExit parameters. We remember the + // initial values of the other values to assert our assumption. + SmallDenseMap, ExitLimit> TripCountMap; + + const Loop *L; + BasicBlock *TBB; + BasicBlock *FBB; + bool AllowPredicates; + + public: + ExitLimitCache(const Loop *L, BasicBlock *TBB, BasicBlock *FBB, + bool AllowPredicates) + : L(L), TBB(TBB), FBB(FBB), AllowPredicates(AllowPredicates) {} + + Optional find(const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, + bool AllowPredicates); + + void insert(const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates, + const ExitLimit &EL); + }; + + typedef ExitLimitCache ExitLimitCacheTy; + ExitLimit computeExitLimitFromCondCached(ExitLimitCacheTy &Cache, + const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, + bool AllowPredicates); + ExitLimit computeExitLimitFromCondImpl(ExitLimitCacheTy &Cache, const Loop *L, + Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, + bool AllowPredicates); + /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h index e3c2f3bed227..764308dceed9 100644 --- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h +++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h @@ -29,6 +29,7 @@ template class ArrayRef; class DominatorTree; class GEPOperator; class Instruction; + struct KnownBits; class Loop; class LoopInfo; class OptimizationRemarkEmitter; @@ -49,7 +50,7 @@ template class ArrayRef; /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. - void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, + void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, diff --git a/contrib/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm/include/llvm/CodeGen/DIE.h index 95c4b4248bbd..a40147336fe2 100644 --- a/contrib/llvm/include/llvm/CodeGen/DIE.h +++ b/contrib/llvm/include/llvm/CodeGen/DIE.h @@ -793,6 +793,9 @@ class DIEUnit { uint32_t Length; /// The length in bytes of all of the DIEs in this unit. const uint16_t Version; /// The Dwarf version number for this unit. const uint8_t AddrSize; /// The size in bytes of an address for this unit. +protected: + ~DIEUnit() = default; + public: DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag); DIEUnit(const DIEUnit &RHS) = delete; @@ -808,6 +811,10 @@ class DIEUnit { this->Section = Section; } + virtual const MCSymbol *getCrossSectionRelativeBaseAddress() const { + return nullptr; + } + /// Return the section that this DIEUnit will be emitted into. /// /// \returns Section pointer which can be NULL. @@ -822,7 +829,11 @@ class DIEUnit { const DIE &getUnitDie() const { return Die; } }; - +struct BasicDIEUnit final : DIEUnit { + BasicDIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag) + : DIEUnit(Version, AddrSize, UnitTag) {} +}; + //===--------------------------------------------------------------------===// /// DIELoc - Represents an expression location. // diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 911e8756070b..899563acc330 100644 --- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -18,20 +18,52 @@ #include "llvm/ADT/Optional.h" #include +#include +#include namespace llvm { class MachineInstr; +class MachineInstrBuilder; +class MachineFunction; class MachineOperand; class MachineRegisterInfo; class RegisterBankInfo; class TargetInstrInfo; class TargetRegisterInfo; +/// Container class for CodeGen predicate results. +/// This is convenient because std::bitset does not have a constructor +/// with an initializer list of set bits. +/// +/// Each InstructionSelector subclass should define a PredicateBitset class with: +/// const unsigned MAX_SUBTARGET_PREDICATES = 192; +/// using PredicateBitset = PredicateBitsetImpl; +/// and updating the constant to suit the target. Tablegen provides a suitable +/// definition for the predicates in use in GenGlobalISel.inc when +/// GET_GLOBALISEL_PREDICATE_BITSET is defined. +template +class PredicateBitsetImpl : public std::bitset { +public: + // Cannot inherit constructors because it's not supported by VC++.. + PredicateBitsetImpl() = default; + + PredicateBitsetImpl(const std::bitset &B) + : std::bitset(B) {} + + PredicateBitsetImpl(std::initializer_list Init) { + for (auto I : Init) + std::bitset::set(I); + } +}; + /// Provides the logic to select generic machine instructions. class InstructionSelector { public: virtual ~InstructionSelector() {} + /// This is executed before selecting a function. + virtual void beginFunction(const MachineFunction &MF) {} + /// Select the (possibly generic) instruction \p I to only use target-specific /// opcodes. It is OK to insert multiple instructions, but they cannot be /// generic pre-isel instructions. @@ -46,6 +78,8 @@ class InstructionSelector { virtual bool select(MachineInstr &I) const = 0; protected: + typedef std::function ComplexRendererFn; + InstructionSelector(); /// Mutate the newly-selected instruction \p I to constrain its (possibly diff --git a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h index 81b43126adeb..e16354088296 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h @@ -65,7 +65,6 @@ class MachineOperand { MO_CFIIndex, ///< MCCFIInstruction index. MO_IntrinsicID, ///< Intrinsic ID for ISel MO_Predicate, ///< Generic predicate for ISel - MO_Placeholder, ///< Placeholder for GlobalISel ComplexPattern result. }; private: @@ -768,11 +767,6 @@ class MachineOperand { return Op; } - static MachineOperand CreatePlaceholder() { - MachineOperand Op(MachineOperand::MO_Placeholder); - return Op; - } - friend class MachineInstr; friend class MachineRegisterInfo; private: diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h index 6f0509543e7d..4bb658898fb5 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -654,6 +654,15 @@ class SelectionDAG { return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } + /// Return an ISD::BUILD_VECTOR node. The number of elements in VT, + /// which must be a vector type, must match the number of operands in Ops. + /// The operands must have the same type as (or, for integers, a type wider + /// than) VT's element type. + SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef Ops) { + // VerifySDNode (via InsertNode) checks BUILD_VECTOR later. + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + /// Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all /// elements. VT must be a vector type. Op's type must be the same as (or, /// for integers, a type wider than) VT's element type. @@ -968,7 +977,7 @@ class SelectionDAG { bool IsExpanding = false); SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, - MachineMemOperand *MMO, bool IsTruncating = false, + MachineMemOperand *MMO, bool IsTruncating = false, bool IsCompressing = false); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO); diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h index 2791c9dc3746..e599f8a19e34 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h @@ -546,7 +546,7 @@ enum class TrampolineType : uint16_t { TrampIncremental, BranchIsland }; // These values correspond to the CV_SourceChksum_t enumeration. enum class FileChecksumKind : uint8_t { None, MD5, SHA1, SHA256 }; -enum LineFlags : uint32_t { +enum LineFlags : uint16_t { HaveColumns = 1, // CV_LINES_HAVE_COLUMNS }; } diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h index 1a40654a3f33..31344a9427db 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h @@ -81,7 +81,7 @@ template <> class VarStreamArrayExtractor { BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(BlockHeader)) return EC; - bool HasColumn = Header->Flags & LineFlags::HaveColumns; + bool HasColumn = Header->Flags & uint32_t(LineFlags::HaveColumns); uint32_t LineInfoSize = BlockHeader->NumLines * (sizeof(LineNumberEntry) + (HasColumn ? sizeof(ColumnNumberEntry) : 0)); diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDumperBase.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDumperBase.h deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h index b2a4d247ccc6..a46d46a5bff3 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h @@ -18,9 +18,9 @@ namespace llvm { class DWARFCompileUnit : public DWARFUnit { public: DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, - bool IsDWO, const DWARFUnitSectionBase &UnitSection, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, + bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, UnitSection, Entry) {} diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index f941cdd1060a..d89e2c684cd3 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -50,6 +50,11 @@ class raw_ostream; // entire size of the debug info sections. typedef DenseMap> RelocAddrMap; +/// Reads a value from data extractor and applies a relocation to the result if +/// one exists for the given offset. +uint64_t getRelocatedValue(const DataExtractor &Data, uint32_t Size, + uint32_t *Off, const RelocAddrMap *Relocs); + /// DWARFContext /// This data structure is the top level entity that deals with dwarf debug /// information parsing. The actual data is supplied through pure virtual @@ -216,7 +221,7 @@ class DWARFContext : public DIContext { virtual StringRef getEHFrameSection() = 0; virtual const DWARFSection &getLineSection() = 0; virtual StringRef getStringSection() = 0; - virtual StringRef getRangeSection() = 0; + virtual const DWARFSection& getRangeSection() = 0; virtual StringRef getMacinfoSection() = 0; virtual StringRef getPubNamesSection() = 0; virtual StringRef getPubTypesSection() = 0; @@ -231,7 +236,7 @@ class DWARFContext : public DIContext { virtual const DWARFSection &getLocDWOSection() = 0; virtual StringRef getStringDWOSection() = 0; virtual StringRef getStringOffsetDWOSection() = 0; - virtual StringRef getRangeDWOSection() = 0; + virtual const DWARFSection &getRangeDWOSection() = 0; virtual StringRef getAddrSection() = 0; virtual const DWARFSection& getAppleNamesSection() = 0; virtual const DWARFSection& getAppleTypesSection() = 0; @@ -271,7 +276,7 @@ class DWARFContextInMemory : public DWARFContext { StringRef EHFrameSection; DWARFSection LineSection; StringRef StringSection; - StringRef RangeSection; + DWARFSection RangeSection; StringRef MacinfoSection; StringRef PubNamesSection; StringRef PubTypesSection; @@ -286,7 +291,7 @@ class DWARFContextInMemory : public DWARFContext { DWARFSection LocDWOSection; StringRef StringDWOSection; StringRef StringOffsetDWOSection; - StringRef RangeDWOSection; + DWARFSection RangeDWOSection; StringRef AddrSection; DWARFSection AppleNamesSection; DWARFSection AppleTypesSection; @@ -319,7 +324,7 @@ class DWARFContextInMemory : public DWARFContext { StringRef getEHFrameSection() override { return EHFrameSection; } const DWARFSection &getLineSection() override { return LineSection; } StringRef getStringSection() override { return StringSection; } - StringRef getRangeSection() override { return RangeSection; } + const DWARFSection &getRangeSection() override { return RangeSection; } StringRef getMacinfoSection() override { return MacinfoSection; } StringRef getPubNamesSection() override { return PubNamesSection; } StringRef getPubTypesSection() override { return PubTypesSection; } @@ -346,7 +351,7 @@ class DWARFContextInMemory : public DWARFContext { return StringOffsetDWOSection; } - StringRef getRangeDWOSection() override { return RangeDWOSection; } + const DWARFSection &getRangeDWOSection() override { return RangeDWOSection; } StringRef getAddrSection() override { return AddrSection; diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index 018a049a3ed8..9172df5bfac6 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -11,6 +11,8 @@ #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H #include "llvm/Support/DataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" + #include #include #include @@ -71,7 +73,7 @@ class DWARFDebugRangeList { void clear(); void dump(raw_ostream &OS) const; - bool extract(DataExtractor data, uint32_t *offset_ptr); + bool extract(DataExtractor data, uint32_t *offset_ptr, const RelocAddrMap& Relocs); const std::vector &getEntries() { return Entries; } /// getAbsoluteRanges - Returns absolute address ranges defined by this range diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index 703316005887..c9da2c9a3e16 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -30,9 +30,9 @@ class DWARFTypeUnit : public DWARFUnit { public: DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, bool IsDWO, - const DWARFUnitSectionBase &UnitSection, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, + bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, UnitSection, Entry) {} diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 023a0f7b9fb2..e29ba523238c 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -56,9 +56,9 @@ class DWARFUnitSectionBase { ~DWARFUnitSectionBase() = default; virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, - bool isLittleEndian, bool isDWO) = 0; + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, StringRef SOS, StringRef AOS, + StringRef LS, bool isLittleEndian, bool isDWO) = 0; }; const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, @@ -88,9 +88,9 @@ class DWARFUnitSection final : public SmallVector, 1>, private: void parseImpl(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, - bool IsDWO) override { + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, + bool LE, bool IsDWO) override { if (Parsed) return; const auto &Index = getDWARFUnitIndex(Context, UnitType::Section); @@ -115,7 +115,7 @@ class DWARFUnit { const DWARFSection &InfoSection; const DWARFDebugAbbrev *Abbrev; - StringRef RangeSection; + const DWARFSection *RangeSection; uint32_t RangeSectionBase; StringRef LineSection; StringRef StringSection; @@ -171,7 +171,7 @@ class DWARFUnit { public: DWARFUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry = nullptr); @@ -192,7 +192,7 @@ class DWARFUnit { // Recursively update address to Die map. void updateAddressDieMap(DWARFDie Die); - void setRangesSection(StringRef RS, uint32_t Base) { + void setRangesSection(const DWARFSection *RS, uint32_t Base) { RangeSection = RS; RangeSectionBase = Base; } diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h index c0633cbdfa52..3710eb29e7f9 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h @@ -102,7 +102,8 @@ class DIARawSymbol : public IPDBRawSymbol { uint32_t getVirtualBaseDispIndex() const override; uint32_t getVirtualBaseOffset() const override; uint32_t getVirtualTableShapeId() const override; - std::unique_ptr getVirtualBaseTableType() const override; + std::unique_ptr + getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; PDB_UniqueId getGuid() const override; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h index 4c28e194bc70..fab086c62c72 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h @@ -113,7 +113,7 @@ class IPDBRawSymbol { virtual Variant getValue() const = 0; virtual uint32_t getVirtualBaseDispIndex() const = 0; virtual uint32_t getVirtualBaseOffset() const = 0; - virtual std::unique_ptr + virtual std::unique_ptr getVirtualBaseTableType() const = 0; virtual uint32_t getVirtualTableShapeId() const = 0; virtual PDB_DataKind getDataKind() const = 0; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModStream.h index d65e195dbb95..b12d4ff375f3 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModStream.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModStream.h @@ -40,6 +40,8 @@ class ModStream { iterator_range lines(bool *HadError) const; + bool hasLineInfo() const; + Error commit(); private: diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h index cffb5d09d225..e1e78035ff38 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h @@ -101,7 +101,8 @@ class NativeRawSymbol : public IPDBRawSymbol { uint32_t getVirtualBaseDispIndex() const override; uint32_t getVirtualBaseOffset() const override; uint32_t getVirtualTableShapeId() const override; - std::unique_ptr getVirtualBaseTableType() const override; + std::unique_ptr + getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; PDB_UniqueId getGuid() const override; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h b/contrib/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h index e3dcba50bd1a..6bc3660fbe51 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include #include @@ -32,40 +33,63 @@ class PDBSymbolTypeVTable; class ClassLayout; class BaseClassLayout; -class StorageItemBase; +class LayoutItemBase; class UDTLayoutBase; -class StorageItemBase { +class LayoutItemBase { public: - StorageItemBase(const UDTLayoutBase &Parent, const PDBSymbol &Symbol, - const std::string &Name, uint32_t OffsetInParent, - uint32_t Size); - virtual ~StorageItemBase() {} + LayoutItemBase(const UDTLayoutBase *Parent, const PDBSymbol *Symbol, + const std::string &Name, uint32_t OffsetInParent, + uint32_t Size, bool IsElided); + virtual ~LayoutItemBase() {} - virtual uint32_t deepPaddingSize() const; + uint32_t deepPaddingSize() const; + virtual uint32_t immediatePadding() const { return 0; } + virtual uint32_t tailPadding() const; - const UDTLayoutBase &getParent() const { return Parent; } + const UDTLayoutBase *getParent() const { return Parent; } StringRef getName() const { return Name; } uint32_t getOffsetInParent() const { return OffsetInParent; } uint32_t getSize() const { return SizeOf; } - const PDBSymbol &getSymbol() const { return Symbol; } + uint32_t getLayoutSize() const { return LayoutSize; } + const PDBSymbol *getSymbol() const { return Symbol; } + const BitVector &usedBytes() const { return UsedBytes; } + bool isElided() const { return IsElided; } + virtual bool isVBPtr() const { return false; } + + uint32_t containsOffset(uint32_t Off) const { + uint32_t Begin = getOffsetInParent(); + uint32_t End = Begin + getSize(); + return (Off >= Begin && Off < End); + } protected: - const UDTLayoutBase &Parent; - const PDBSymbol &Symbol; + const PDBSymbol *Symbol = nullptr; + const UDTLayoutBase *Parent = nullptr; BitVector UsedBytes; std::string Name; uint32_t OffsetInParent = 0; uint32_t SizeOf = 0; + uint32_t LayoutSize = 0; + bool IsElided = false; }; -class DataMemberLayoutItem : public StorageItemBase { +class VBPtrLayoutItem : public LayoutItemBase { +public: + VBPtrLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr Sym, uint32_t Offset, + uint32_t Size); + virtual bool isVBPtr() const { return true; } + +private: + std::unique_ptr Type; +}; + +class DataMemberLayoutItem : public LayoutItemBase { public: DataMemberLayoutItem(const UDTLayoutBase &Parent, std::unique_ptr DataMember); - virtual uint32_t deepPaddingSize() const; - const PDBSymbolData &getDataMember(); bool hasUDTLayout() const; const ClassLayout &getUDTLayout() const; @@ -75,77 +99,73 @@ class DataMemberLayoutItem : public StorageItemBase { std::unique_ptr UdtLayout; }; -class VTableLayoutItem : public StorageItemBase { +class VTableLayoutItem : public LayoutItemBase { public: VTableLayoutItem(const UDTLayoutBase &Parent, std::unique_ptr VTable); - ArrayRef funcs() const { return VTableFuncs; } uint32_t getElementSize() const { return ElementSize; } - void setFunction(uint32_t Index, PDBSymbolFunc &Func) { - VTableFuncs[Index] = &Func; - } - private: uint32_t ElementSize = 0; - std::unique_ptr Shape; std::unique_ptr VTable; - std::vector VTableFuncs; }; -class UDTLayoutBase { +class UDTLayoutBase : public LayoutItemBase { template using UniquePtrVector = std::vector>; public: - UDTLayoutBase(const PDBSymbol &Symbol, const std::string &Name, - uint32_t Size); + UDTLayoutBase(const UDTLayoutBase *Parent, const PDBSymbol &Sym, + const std::string &Name, uint32_t OffsetInParent, uint32_t Size, + bool IsElided); - uint32_t shallowPaddingSize() const; - uint32_t deepPaddingSize() const; + uint32_t tailPadding() const override; - const BitVector &usedBytes() const { return UsedBytes; } + ArrayRef layout_items() const { return LayoutItems; } - uint32_t getClassSize() const { return SizeOf; } + ArrayRef bases() const { return AllBases; } + ArrayRef regular_bases() const { return NonVirtualBases; } + ArrayRef virtual_bases() const { return VirtualBases; } - ArrayRef> layout_items() const { - return ChildStorage; - } - - VTableLayoutItem *findVTableAtOffset(uint32_t RelativeOffset); - - StringRef getUDTName() const { return Name; } - - ArrayRef bases() const { return BaseClasses; } - ArrayRef> vbases() const { - return VirtualBases; - } + uint32_t directVirtualBaseCount() const { return DirectVBaseCount; } ArrayRef> funcs() const { return Funcs; } ArrayRef> other_items() const { return Other; } - const PDBSymbol &getSymbolBase() const { return SymbolBase; } - protected: + bool hasVBPtrAtOffset(uint32_t Off) const; void initializeChildren(const PDBSymbol &Sym); - void addChildToLayout(std::unique_ptr Child); - void addVirtualOverride(PDBSymbolFunc &Func); - void addVirtualIntro(PDBSymbolFunc &Func); + void addChildToLayout(std::unique_ptr Child); - const PDBSymbol &SymbolBase; - std::string Name; - uint32_t SizeOf = 0; + uint32_t DirectVBaseCount = 0; - BitVector UsedBytes; UniquePtrVector Other; UniquePtrVector Funcs; - UniquePtrVector VirtualBases; - UniquePtrVector ChildStorage; - std::vector> ChildrenPerByte; - std::vector BaseClasses; + UniquePtrVector ChildStorage; + std::vector LayoutItems; + + std::vector AllBases; + ArrayRef NonVirtualBases; + ArrayRef VirtualBases; + VTableLayoutItem *VTable = nullptr; + VBPtrLayoutItem *VBPtr = nullptr; +}; + +class BaseClassLayout : public UDTLayoutBase { +public: + BaseClassLayout(const UDTLayoutBase &Parent, uint32_t OffsetInParent, + bool Elide, std::unique_ptr Base); + + const PDBSymbolTypeBaseClass &getBase() const { return *Base; } + bool isVirtualBase() const { return IsVirtualBase; } + bool isEmptyBase() { return SizeOf == 1 && LayoutSize == 0; } + +private: + std::unique_ptr Base; + bool IsVirtualBase; }; class ClassLayout : public UDTLayoutBase { @@ -156,24 +176,13 @@ class ClassLayout : public UDTLayoutBase { ClassLayout(ClassLayout &&Other) = default; const PDBSymbolTypeUDT &getClass() const { return UDT; } + uint32_t immediatePadding() const override; private: + BitVector ImmediateUsedBytes; std::unique_ptr OwnedStorage; const PDBSymbolTypeUDT &UDT; }; - -class BaseClassLayout : public UDTLayoutBase, public StorageItemBase { -public: - BaseClassLayout(const UDTLayoutBase &Parent, - std::unique_ptr Base); - - const PDBSymbolTypeBaseClass &getBase() const { return *Base; } - bool isVirtualBase() const { return IsVirtualBase; } - -private: - std::unique_ptr Base; - bool IsVirtualBase; -}; } } // namespace llvm diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h index 84a037b2f998..a3be242b4457 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h @@ -348,7 +348,7 @@ class SerializationTraits { // key of the deserializers map to save us from duplicating the string in // the serializer. This should be changed to use a stringpool if we switch // to a map type that may move keys in memory. - std::lock_guard Lock(DeserializersMutex); + std::lock_guard Lock(DeserializersMutex); auto I = Deserializers.insert(Deserializers.begin(), std::make_pair(std::move(Name), @@ -358,7 +358,7 @@ class SerializationTraits { { assert(KeyName != nullptr && "No keyname pointer"); - std::lock_guard Lock(SerializersMutex); + std::lock_guard Lock(SerializersMutex); // FIXME: Move capture Serialize once we have C++14. Serializers[ErrorInfoT::classID()] = [KeyName, Serialize](ChannelT &C, const ErrorInfoBase &EIB) -> Error { @@ -372,7 +372,8 @@ class SerializationTraits { } static Error serialize(ChannelT &C, Error &&Err) { - std::lock_guard Lock(SerializersMutex); + std::lock_guard Lock(SerializersMutex); + if (!Err) return serializeSeq(C, std::string()); @@ -386,7 +387,7 @@ class SerializationTraits { } static Error deserialize(ChannelT &C, Error &Err) { - std::lock_guard Lock(DeserializersMutex); + std::lock_guard Lock(DeserializersMutex); std::string Key; if (auto Err = deserializeSeq(C, Key)) @@ -406,8 +407,6 @@ class SerializationTraits { private: static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) { - assert(EIB.dynamicClassID() != StringError::classID() && - "StringError serialization not registered"); std::string ErrMsg; { raw_string_ostream ErrMsgStream(ErrMsg); @@ -417,17 +416,17 @@ class SerializationTraits { inconvertibleErrorCode())); } - static std::mutex SerializersMutex; - static std::mutex DeserializersMutex; + static std::recursive_mutex SerializersMutex; + static std::recursive_mutex DeserializersMutex; static std::map Serializers; static std::map Deserializers; }; template -std::mutex SerializationTraits::SerializersMutex; +std::recursive_mutex SerializationTraits::SerializersMutex; template -std::mutex SerializationTraits::DeserializersMutex; +std::recursive_mutex SerializationTraits::DeserializersMutex; template std::map::WrappedErrorDeserializer> SerializationTraits::Deserializers; +/// Registers a serializer and deserializer for the given error type on the +/// given channel type. +template +void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize, + DeserializeFtor &&Deserialize) { + SerializationTraits::template registerErrorType( + std::move(Name), + std::forward(Serialize), + std::forward(Deserialize)); +} + +/// Registers serialization/deserialization for StringError. template void registerStringError() { static bool AlreadyRegistered = false; if (!AlreadyRegistered) { - SerializationTraits:: - template registerErrorType( - "StringError", - [](ChannelT &C, const StringError &SE) { - return serializeSeq(C, SE.getMessage()); - }, - [](ChannelT &C, Error &Err) { - ErrorAsOutParameter EAO(&Err); - std::string Msg; - if (auto E2 = deserializeSeq(C, Msg)) - return E2; - Err = - make_error(std::move(Msg), - orcError( - OrcErrorCode::UnknownErrorCodeFromRemote)); - return Error::success(); - }); + registerErrorSerialization( + "StringError", + [](ChannelT &C, const StringError &SE) { + return serializeSeq(C, SE.getMessage()); + }, + [](ChannelT &C, Error &Err) -> Error { + ErrorAsOutParameter EAO(&Err); + std::string Msg; + if (auto E2 = deserializeSeq(C, Msg)) + return E2; + Err = + make_error(std::move(Msg), + orcError( + OrcErrorCode::UnknownErrorCodeFromRemote)); + return Error::success(); + }); AlreadyRegistered = true; } } diff --git a/contrib/llvm/include/llvm/IR/Attributes.h b/contrib/llvm/include/llvm/IR/Attributes.h index b13f197d25fd..e2cd4c236fcc 100644 --- a/contrib/llvm/include/llvm/IR/Attributes.h +++ b/contrib/llvm/include/llvm/IR/Attributes.h @@ -509,7 +509,7 @@ class AttributeList { unsigned getSlotIndex(unsigned Slot) const; /// \brief Return the attributes at the given slot. - AttributeList getSlotAttributes(unsigned Slot) const; + AttributeSet getSlotAttributes(unsigned Slot) const; void dump() const; }; diff --git a/contrib/llvm/include/llvm/IR/ConstantRange.h b/contrib/llvm/include/llvm/IR/ConstantRange.h index 47004e82cc19..fd7f96abb19e 100644 --- a/contrib/llvm/include/llvm/IR/ConstantRange.h +++ b/contrib/llvm/include/llvm/IR/ConstantRange.h @@ -93,7 +93,7 @@ class ConstantRange { /// /// NB! The returned set does *not* contain **all** possible values of X for /// which "X BinOpC Y" does not wrap -- some viable values of X may be - /// missing, so you cannot use this to contrain X's range. E.g. in the last + /// missing, so you cannot use this to constrain X's range. E.g. in the last /// example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), but (-2) /// is not in the set returned. /// diff --git a/contrib/llvm/include/llvm/IR/Dominators.h b/contrib/llvm/include/llvm/IR/Dominators.h index cae03d33a7ee..8f6c85f53efc 100644 --- a/contrib/llvm/include/llvm/IR/Dominators.h +++ b/contrib/llvm/include/llvm/IR/Dominators.h @@ -157,6 +157,10 @@ class DominatorTree : public DominatorTreeBase { /// This should only be used for debugging as it aborts the program if the /// verification fails. void verifyDomTree() const; + + // Pop up a GraphViz/gv window with the Dominator Tree rendered using `dot`. + void viewGraph(const Twine &Name, const Twine &Title); + void viewGraph(); }; //===------------------------------------- diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 5415c6b0d151..21d8a15e7e7a 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -629,6 +629,8 @@ def int_amdgcn_readfirstlane : GCCBuiltin<"__builtin_amdgcn_readfirstlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; +// The lane argument must be uniform across the currently active threads of the +// current wave. Otherwise, the result is undefined. def int_amdgcn_readlane : GCCBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; diff --git a/contrib/llvm/include/llvm/IR/Module.h b/contrib/llvm/include/llvm/IR/Module.h index 70c57cf90add..67c35cd22b34 100644 --- a/contrib/llvm/include/llvm/IR/Module.h +++ b/contrib/llvm/include/llvm/IR/Module.h @@ -319,7 +319,7 @@ class Module { /// exist, add a prototype for the function and return it. This function /// guarantees to return a constant of pointer to the specified function type /// or a ConstantExpr BitCast of that type if the named function has a - /// different type. This version of the method takes a null terminated list of + /// different type. This version of the method takes a list of /// function arguments, which makes it easier for clients to use. template Constant *getOrInsertFunction(StringRef Name, diff --git a/contrib/llvm/include/llvm/IR/Value.h b/contrib/llvm/include/llvm/IR/Value.h index a4b48d7f3539..00f821399257 100644 --- a/contrib/llvm/include/llvm/IR/Value.h +++ b/contrib/llvm/include/llvm/IR/Value.h @@ -482,6 +482,17 @@ class Value { static_cast(this)->stripPointerCasts()); } + /// \brief Strip off pointer casts, all-zero GEPs, aliases and barriers. + /// + /// Returns the original uncasted value. If this is called on a non-pointer + /// value, it returns 'this'. This function should be used only in + /// Alias analysis. + const Value *stripPointerCastsAndBarriers() const; + Value *stripPointerCastsAndBarriers() { + return const_cast( + static_cast(this)->stripPointerCastsAndBarriers()); + } + /// \brief Strip off pointer casts and all-zero GEPs. /// /// Returns the original uncasted value. If this is called on a non-pointer diff --git a/contrib/llvm/include/llvm/MC/MCTargetOptions.h b/contrib/llvm/include/llvm/MC/MCTargetOptions.h index 06f58d498030..ab027ab27a41 100644 --- a/contrib/llvm/include/llvm/MC/MCTargetOptions.h +++ b/contrib/llvm/include/llvm/MC/MCTargetOptions.h @@ -54,6 +54,7 @@ class MCTargetOptions { int DwarfVersion = 0; std::string ABIName; + std::string SplitDwarfFile; /// Additional paths to search for `.include` directives when using the /// integrated assembler. diff --git a/contrib/llvm/include/llvm/Object/ELF.h b/contrib/llvm/include/llvm/Object/ELF.h index 7a3155b3953e..9c72bd4023d8 100644 --- a/contrib/llvm/include/llvm/Object/ELF.h +++ b/contrib/llvm/include/llvm/Object/ELF.h @@ -14,9 +14,19 @@ #ifndef LLVM_OBJECT_ELF_H #define LLVM_OBJECT_ELF_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Object/ELFTypes.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include +#include namespace llvm { namespace object { @@ -41,27 +51,27 @@ template class ELFFile { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - typedef typename ELFT::uint uintX_t; - typedef typename ELFT::Ehdr Elf_Ehdr; - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::Dyn Elf_Dyn; - typedef typename ELFT::Phdr Elf_Phdr; - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::Verdef Elf_Verdef; - typedef typename ELFT::Verdaux Elf_Verdaux; - typedef typename ELFT::Verneed Elf_Verneed; - typedef typename ELFT::Vernaux Elf_Vernaux; - typedef typename ELFT::Versym Elf_Versym; - typedef typename ELFT::Hash Elf_Hash; - typedef typename ELFT::GnuHash Elf_GnuHash; - typedef typename ELFT::DynRange Elf_Dyn_Range; - typedef typename ELFT::ShdrRange Elf_Shdr_Range; - typedef typename ELFT::SymRange Elf_Sym_Range; - typedef typename ELFT::RelRange Elf_Rel_Range; - typedef typename ELFT::RelaRange Elf_Rela_Range; - typedef typename ELFT::PhdrRange Elf_Phdr_Range; + using uintX_t = typename ELFT::uint; + using Elf_Ehdr = typename ELFT::Ehdr; + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Sym = typename ELFT::Sym; + using Elf_Dyn = typename ELFT::Dyn; + using Elf_Phdr = typename ELFT::Phdr; + using Elf_Rel = typename ELFT::Rel; + using Elf_Rela = typename ELFT::Rela; + using Elf_Verdef = typename ELFT::Verdef; + using Elf_Verdaux = typename ELFT::Verdaux; + using Elf_Verneed = typename ELFT::Verneed; + using Elf_Vernaux = typename ELFT::Vernaux; + using Elf_Versym = typename ELFT::Versym; + using Elf_Hash = typename ELFT::Hash; + using Elf_GnuHash = typename ELFT::GnuHash; + using Elf_Dyn_Range = typename ELFT::DynRange; + using Elf_Shdr_Range = typename ELFT::ShdrRange; + using Elf_Sym_Range = typename ELFT::SymRange; + using Elf_Rel_Range = typename ELFT::RelRange; + using Elf_Rela_Range = typename ELFT::RelaRange; + using Elf_Phdr_Range = typename ELFT::PhdrRange; const uint8_t *base() const { return reinterpret_cast(Buf.data()); @@ -70,7 +80,6 @@ class ELFFile { size_t getBufSize() const { return Buf.size(); } private: - StringRef Buf; public: @@ -161,10 +170,10 @@ class ELFFile { Expected> getSectionContents(const Elf_Shdr *Sec) const; }; -typedef ELFFile> ELF32LEFile; -typedef ELFFile> ELF64LEFile; -typedef ELFFile> ELF32BEFile; -typedef ELFFile> ELF64BEFile; +using ELF32LEFile = ELFFile>; +using ELF64LEFile = ELFFile>; +using ELF32BEFile = ELFFile>; +using ELF64BEFile = ELFFile>; template inline Expected @@ -194,7 +203,7 @@ ELFFile::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, ArrayRef ShndxTable) const { uint32_t Index = Sym->st_shndx; if (Index == ELF::SHN_XINDEX) { - auto ErrorOrIndex = object::getExtendedSymbolTableIndex( + auto ErrorOrIndex = getExtendedSymbolTableIndex( Sym, Syms.begin(), ShndxTable); if (!ErrorOrIndex) return ErrorOrIndex.takeError(); @@ -519,7 +528,8 @@ inline unsigned hashSysV(StringRef SymbolName) { } return h; } + } // end namespace object } // end namespace llvm -#endif +#endif // LLVM_OBJECT_ELF_H diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h index 9e95f2958aa4..d8b58b8079fa 100644 --- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h +++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h @@ -27,6 +27,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" @@ -42,13 +43,11 @@ namespace llvm { namespace object { class elf_symbol_iterator; -class ELFSymbolRef; -class ELFRelocationRef; class ELFObjectFileBase : public ObjectFile { - friend class ELFSymbolRef; - friend class ELFSectionRef; friend class ELFRelocationRef; + friend class ELFSectionRef; + friend class ELFSymbolRef; protected: ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source); @@ -65,7 +64,8 @@ class ELFObjectFileBase : public ObjectFile { virtual ErrorOr getRelocationAddend(DataRefImpl Rel) const = 0; public: - typedef iterator_range elf_symbol_iterator_range; + using elf_symbol_iterator_range = iterator_range; + virtual elf_symbol_iterator_range getDynamicSymbolIterators() const = 0; elf_symbol_iterator_range symbols() const; @@ -201,14 +201,14 @@ template class ELFObjectFile : public ELFObjectFileBase { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - typedef typename ELFFile::uintX_t uintX_t; + using uintX_t = typename ELFFile::uintX_t; - typedef typename ELFFile::Elf_Sym Elf_Sym; - typedef typename ELFFile::Elf_Shdr Elf_Shdr; - typedef typename ELFFile::Elf_Ehdr Elf_Ehdr; - typedef typename ELFFile::Elf_Rel Elf_Rel; - typedef typename ELFFile::Elf_Rela Elf_Rela; - typedef typename ELFFile::Elf_Dyn Elf_Dyn; + using Elf_Sym = typename ELFFile::Elf_Sym; + using Elf_Shdr = typename ELFFile::Elf_Shdr; + using Elf_Ehdr = typename ELFFile::Elf_Ehdr; + using Elf_Rel = typename ELFFile::Elf_Rel; + using Elf_Rela = typename ELFFile::Elf_Rela; + using Elf_Dyn = typename ELFFile::Elf_Dyn; protected: ELFFile EF; @@ -398,10 +398,10 @@ template class ELFObjectFile : public ELFObjectFileBase { bool isRelocatableObject() const override; }; -typedef ELFObjectFile> ELF32LEObjectFile; -typedef ELFObjectFile> ELF64LEObjectFile; -typedef ELFObjectFile> ELF32BEObjectFile; -typedef ELFObjectFile> ELF64BEObjectFile; +using ELF32LEObjectFile = ELFObjectFile>; +using ELF64LEObjectFile = ELFObjectFile>; +using ELF32BEObjectFile = ELFObjectFile>; +using ELF64BEObjectFile = ELFObjectFile>; template void ELFObjectFile::moveSymbolNext(DataRefImpl &Sym) const { diff --git a/contrib/llvm/include/llvm/Object/ELFTypes.h b/contrib/llvm/include/llvm/Object/ELFTypes.h index 3e03fd8b980e..99346fe1a882 100644 --- a/contrib/llvm/include/llvm/Object/ELFTypes.h +++ b/contrib/llvm/include/llvm/Object/ELFTypes.h @@ -11,10 +11,15 @@ #define LLVM_OBJECT_ELFTYPES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Object/Error.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include namespace llvm { namespace object { @@ -45,58 +50,58 @@ template struct ELFType { static const endianness TargetEndianness = E; static const bool Is64Bits = Is64; - typedef typename std::conditional::type uint; - typedef Elf_Ehdr_Impl> Ehdr; - typedef Elf_Shdr_Impl> Shdr; - typedef Elf_Sym_Impl> Sym; - typedef Elf_Dyn_Impl> Dyn; - typedef Elf_Phdr_Impl> Phdr; - typedef Elf_Rel_Impl, false> Rel; - typedef Elf_Rel_Impl, true> Rela; - typedef Elf_Verdef_Impl> Verdef; - typedef Elf_Verdaux_Impl> Verdaux; - typedef Elf_Verneed_Impl> Verneed; - typedef Elf_Vernaux_Impl> Vernaux; - typedef Elf_Versym_Impl> Versym; - typedef Elf_Hash_Impl> Hash; - typedef Elf_GnuHash_Impl> GnuHash; - typedef Elf_Chdr_Impl> Chdr; - typedef ArrayRef DynRange; - typedef ArrayRef ShdrRange; - typedef ArrayRef SymRange; - typedef ArrayRef RelRange; - typedef ArrayRef RelaRange; - typedef ArrayRef PhdrRange; + using uint = typename std::conditional::type; + using Ehdr = Elf_Ehdr_Impl>; + using Shdr = Elf_Shdr_Impl>; + using Sym = Elf_Sym_Impl>; + using Dyn = Elf_Dyn_Impl>; + using Phdr = Elf_Phdr_Impl>; + using Rel = Elf_Rel_Impl, false>; + using Rela = Elf_Rel_Impl, true>; + using Verdef = Elf_Verdef_Impl>; + using Verdaux = Elf_Verdaux_Impl>; + using Verneed = Elf_Verneed_Impl>; + using Vernaux = Elf_Vernaux_Impl>; + using Versym = Elf_Versym_Impl>; + using Hash = Elf_Hash_Impl>; + using GnuHash = Elf_GnuHash_Impl>; + using Chdr = Elf_Chdr_Impl>; + using DynRange = ArrayRef; + using ShdrRange = ArrayRef; + using SymRange = ArrayRef; + using RelRange = ArrayRef; + using RelaRange = ArrayRef; + using PhdrRange = ArrayRef; - typedef packed Half; - typedef packed Word; - typedef packed Sword; - typedef packed Xword; - typedef packed Sxword; - typedef packed Addr; - typedef packed Off; + using Half = packed; + using Word = packed; + using Sword = packed; + using Xword = packed; + using Sxword = packed; + using Addr = packed; + using Off = packed; }; -typedef ELFType ELF32LE; -typedef ELFType ELF32BE; -typedef ELFType ELF64LE; -typedef ELFType ELF64BE; +using ELF32LE = ELFType; +using ELF32BE = ELFType; +using ELF64LE = ELFType; +using ELF64BE = ELFType; // Use an alignment of 2 for the typedefs since that is the worst case for // ELF files in archives. // Templates to choose Elf_Addr and Elf_Off depending on is64Bits. template struct ELFDataTypeTypedefHelperCommon { - typedef support::detail::packed_endian_specific_integral< - uint16_t, target_endianness, 2> Elf_Half; - typedef support::detail::packed_endian_specific_integral< - uint32_t, target_endianness, 2> Elf_Word; - typedef support::detail::packed_endian_specific_integral< - int32_t, target_endianness, 2> Elf_Sword; - typedef support::detail::packed_endian_specific_integral< - uint64_t, target_endianness, 2> Elf_Xword; - typedef support::detail::packed_endian_specific_integral< - int64_t, target_endianness, 2> Elf_Sxword; + using Elf_Half = support::detail::packed_endian_specific_integral< + uint16_t, target_endianness, 2>; + using Elf_Word = support::detail::packed_endian_specific_integral< + uint32_t, target_endianness, 2>; + using Elf_Sword = support::detail::packed_endian_specific_integral< + int32_t, target_endianness, 2>; + using Elf_Xword = support::detail::packed_endian_specific_integral< + uint64_t, target_endianness, 2>; + using Elf_Sxword = support::detail::packed_endian_specific_integral< + int64_t, target_endianness, 2>; }; template struct ELFDataTypeTypedefHelper; @@ -105,34 +110,34 @@ template struct ELFDataTypeTypedefHelper; template struct ELFDataTypeTypedefHelper> : ELFDataTypeTypedefHelperCommon { - typedef uint32_t value_type; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Addr; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Off; + using value_type = uint32_t; + using Elf_Addr = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; + using Elf_Off = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; }; /// ELF 64bit types. template struct ELFDataTypeTypedefHelper> : ELFDataTypeTypedefHelperCommon { - typedef uint64_t value_type; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Addr; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Off; + using value_type = uint64_t; + using Elf_Addr = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; + using Elf_Off = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; }; // I really don't like doing this, but the alternative is copypasta. #define LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) \ - typedef typename ELFT::Addr Elf_Addr; \ - typedef typename ELFT::Off Elf_Off; \ - typedef typename ELFT::Half Elf_Half; \ - typedef typename ELFT::Word Elf_Word; \ - typedef typename ELFT::Sword Elf_Sword; \ - typedef typename ELFT::Xword Elf_Xword; \ - typedef typename ELFT::Sxword Elf_Sxword; + using Elf_Addr = typename ELFT::Addr; \ + using Elf_Off = typename ELFT::Off; \ + using Elf_Half = typename ELFT::Half; \ + using Elf_Word = typename ELFT::Word; \ + using Elf_Sword = typename ELFT::Sword; \ + using Elf_Xword = typename ELFT::Xword; \ + using Elf_Sxword = typename ELFT::Sxword; #define LLD_ELF_COMMA , #define LLVM_ELF_IMPORT_TYPES(E, W) \ @@ -222,6 +227,7 @@ struct Elf_Sym_Impl : Elf_Sym_Base { uint64_t getValue() const { return st_value; } void setBinding(unsigned char b) { setBindingAndType(b, getType()); } void setType(unsigned char t) { setBindingAndType(getBinding(), t); } + void setBindingAndType(unsigned char b, unsigned char t) { st_info = (b << 4) + (t & 0x0f); } @@ -238,22 +244,29 @@ struct Elf_Sym_Impl : Elf_Sym_Base { } bool isAbsolute() const { return st_shndx == ELF::SHN_ABS; } + bool isCommon() const { return getType() == ELF::STT_COMMON || st_shndx == ELF::SHN_COMMON; } + bool isDefined() const { return !isUndefined(); } + bool isProcessorSpecific() const { return st_shndx >= ELF::SHN_LOPROC && st_shndx <= ELF::SHN_HIPROC; } + bool isOSSpecific() const { return st_shndx >= ELF::SHN_LOOS && st_shndx <= ELF::SHN_HIOS; } + bool isReserved() const { // ELF::SHN_HIRESERVE is 0xffff so st_shndx <= ELF::SHN_HIRESERVE is always // true and some compilers warn about it. return st_shndx >= ELF::SHN_LORESERVE; } + bool isUndefined() const { return st_shndx == ELF::SHN_UNDEF; } + bool isExternal() const { return getBinding() != ELF::STB_LOCAL; } @@ -277,14 +290,12 @@ struct Elf_Versym_Impl { Elf_Half vs_index; // Version index with flags (e.g. VERSYM_HIDDEN) }; -template struct Elf_Verdaux_Impl; - /// Elf_Verdef: This is the structure of entries in the SHT_GNU_verdef section /// (.gnu.version_d). This structure is identical for ELF32 and ELF64. template struct Elf_Verdef_Impl { LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - typedef Elf_Verdaux_Impl Elf_Verdaux; + using Elf_Verdaux = Elf_Verdaux_Impl; Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT) Elf_Half vd_flags; // Bitwise flags (VER_DEF_*) Elf_Half vd_ndx; // Version index, used in .gnu.version entries @@ -361,10 +372,10 @@ template struct Elf_Dyn_Impl : Elf_Dyn_Base { using Elf_Dyn_Base::d_tag; using Elf_Dyn_Base::d_un; - typedef typename std::conditional::type intX_t; - typedef typename std::conditional::type uintX_t; + using intX_t = typename std::conditional::type; + using uintX_t = typename std::conditional::type; intX_t getTag() const { return d_tag; } uintX_t getVal() const { return d_un.d_val; } uintX_t getPtr() const { return d_un.d_ptr; } @@ -430,6 +441,7 @@ struct Elf_Rel_Impl, false> { return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) | ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff); } + void setRInfo(uint64_t R, bool IsMips64EL) { if (IsMips64EL) r_info = (R >> 32) | ((R & 0xff000000) << 8) | ((R & 0x00ff0000) << 24) | @@ -483,15 +495,15 @@ struct Elf_Ehdr_Impl { Elf_Half e_shnum; // Number of entries in the section header table Elf_Half e_shstrndx; // Section header table index of section name // string table + bool checkMagic() const { return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0; } + unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; } unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; } }; -template struct Elf_Phdr_Impl; - template struct Elf_Phdr_Impl> { LLVM_ELF_IMPORT_TYPES(TargetEndianness, false) @@ -582,7 +594,7 @@ struct Elf_Chdr_Impl> { template struct Elf_Mips_RegInfo; -template +template struct Elf_Mips_RegInfo> { LLVM_ELF_IMPORT_TYPES(TargetEndianness, false) Elf_Word ri_gprmask; // bit-mask of used general registers @@ -590,7 +602,7 @@ struct Elf_Mips_RegInfo> { Elf_Addr ri_gp_value; // gp register value }; -template +template struct Elf_Mips_RegInfo> { LLVM_ELF_IMPORT_TYPES(TargetEndianness, true) Elf_Word ri_gprmask; // bit-mask of used general registers @@ -609,7 +621,7 @@ template struct Elf_Mips_Options { Elf_Word info; // Kind-specific information Elf_Mips_RegInfo &getRegInfo() { - assert(kind == llvm::ELF::ODK_REGINFO); + assert(kind == ELF::ODK_REGINFO); return *reinterpret_cast *>( (uint8_t *)this + sizeof(Elf_Mips_Options)); } @@ -637,4 +649,4 @@ template struct Elf_Mips_ABIFlags { } // end namespace object. } // end namespace llvm. -#endif +#endif // LLVM_OBJECT_ELFTYPES_H diff --git a/contrib/llvm/include/llvm/Object/IRSymtab.h b/contrib/llvm/include/llvm/Object/IRSymtab.h index be0f02aa7f17..b425543bf637 100644 --- a/contrib/llvm/include/llvm/Object/IRSymtab.h +++ b/contrib/llvm/include/llvm/Object/IRSymtab.h @@ -25,23 +25,31 @@ #define LLVM_OBJECT_IRSYMTAB_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include namespace llvm { namespace irsymtab { + namespace storage { // The data structures in this namespace define the low-level serialization // format. Clients that just want to read a symbol table should use the // irsymtab::Reader class. -typedef support::ulittle32_t Word; +using Word = support::ulittle32_t; /// A reference to a string in the string table. struct Str { Word Offset, Size; + StringRef get(StringRef Strtab) const { return {Strtab.data() + Offset, Size}; } @@ -50,6 +58,7 @@ struct Str { /// A reference to a range of objects in the symbol table. template struct Range { Word Offset, Size; + ArrayRef get(StringRef Symtab) const { return {reinterpret_cast(Symtab.data() + Offset), Size}; } @@ -122,7 +131,7 @@ struct Header { Str COFFLinkerOpts; }; -} +} // end namespace storage /// Fills in Symtab and Strtab with a valid symbol and string table for Mods. Error build(ArrayRef Mods, SmallVector &Symtab, @@ -152,18 +161,22 @@ struct Symbol { int getComdatIndex() const { return ComdatIndex; } using S = storage::Symbol; + GlobalValue::VisibilityTypes getVisibility() const { return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); } + bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } bool isWeak() const { return (Flags >> S::FB_weak) & 1; } bool isCommon() const { return (Flags >> S::FB_common) & 1; } bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } bool isUsed() const { return (Flags >> S::FB_used) & 1; } bool isTLS() const { return (Flags >> S::FB_tls) & 1; } + bool canBeOmittedFromSymbolTable() const { return (Flags >> S::FB_may_omit) & 1; } + bool isGlobal() const { return (Flags >> S::FB_global) & 1; } bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } @@ -173,6 +186,7 @@ struct Symbol { assert(isCommon()); return CommonSize; } + uint32_t getCommonAlignment() const { assert(isCommon()); return CommonAlign; @@ -197,9 +211,11 @@ class Reader { ArrayRef Uncommons; StringRef str(storage::Str S) const { return S.get(Strtab); } + template ArrayRef range(storage::Range R) const { return R.get(Symtab); } + const storage::Header &header() const { return *reinterpret_cast(Symtab.data()); } @@ -215,7 +231,7 @@ class Reader { Uncommons = range(header().Uncommons); } - typedef iterator_range> symbol_range; + using symbol_range = iterator_range>; /// Returns the symbol table for the entire bitcode file. /// The symbols enumerated by this method are ephemeral, but they can be @@ -298,8 +314,7 @@ inline Reader::symbol_range Reader::module_symbols(unsigned I) const { SymbolRef(MEnd, MEnd, nullptr, this)}; } -} +} // end namespace irsymtab +} // end namespace llvm -} - -#endif +#endif // LLVM_OBJECT_IRSYMTAB_H diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h index 1ee571cce738..29553558f72f 100644 --- a/contrib/llvm/include/llvm/Object/MachO.h +++ b/contrib/llvm/include/llvm/Object/MachO.h @@ -16,10 +16,25 @@ #define LLVM_OBJECT_MACHO_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include namespace llvm { namespace object { @@ -28,11 +43,10 @@ namespace object { /// data in code entry in the table in a Mach-O object file. class DiceRef { DataRefImpl DicePimpl; - const ObjectFile *OwningObject; + const ObjectFile *OwningObject = nullptr; public: - DiceRef() : OwningObject(nullptr) { } - + DiceRef() = default; DiceRef(DataRefImpl DiceP, const ObjectFile *Owner); bool operator==(const DiceRef &Other) const; @@ -47,7 +61,7 @@ class DiceRef { DataRefImpl getRawDataRefImpl() const; const ObjectFile *getObjectFile() const; }; -typedef content_iterator dice_iterator; +using dice_iterator = content_iterator; /// ExportEntry encapsulates the current-state-of-the-walk used when doing a /// non-recursive walk of the trie data structure. This allows you to iterate @@ -71,6 +85,7 @@ class ExportEntry { private: friend class MachOObjectFile; + void moveToFirst(); void moveToEnd(); uint64_t readULEB128(const uint8_t *&p); @@ -80,25 +95,26 @@ class ExportEntry { // Represents a node in the mach-o exports trie. struct NodeState { NodeState(const uint8_t *Ptr); + const uint8_t *Start; const uint8_t *Current; - uint64_t Flags; - uint64_t Address; - uint64_t Other; - const char *ImportName; - unsigned ChildCount; - unsigned NextChildIndex; - unsigned ParentStringLength; - bool IsExportNode; + uint64_t Flags = 0; + uint64_t Address = 0; + uint64_t Other = 0; + const char *ImportName = nullptr; + unsigned ChildCount = 0; + unsigned NextChildIndex = 0; + unsigned ParentStringLength = 0; + bool IsExportNode = false; }; ArrayRef Trie; SmallString<256> CumulativeString; SmallVector Stack; - bool Malformed; - bool Done; + bool Malformed = false; + bool Done = false; }; -typedef content_iterator export_iterator; +using export_iterator = content_iterator; // Segment info so SegIndex/SegOffset pairs in a Mach-O Bind or Rebase entry // can be checked and translated. Only the SegIndex/SegOffset pairs from @@ -106,7 +122,7 @@ typedef content_iterator export_iterator; // address() methods below. class BindRebaseSegInfo { public: - BindRebaseSegInfo(const object::MachOObjectFile *Obj); + BindRebaseSegInfo(const MachOObjectFile *Obj); // Used to check a Mach-O Bind or Rebase entry for errors when iterating. const char *checkSegAndOffset(int32_t SegIndex, uint64_t SegOffset, @@ -130,6 +146,7 @@ class BindRebaseSegInfo { int32_t SegmentIndex; }; const SectionInfo &findSection(int32_t SegIndex, uint64_t SegOffset); + SmallVector Sections; int32_t MaxSegIndex; }; @@ -159,6 +176,7 @@ class MachORebaseEntry { private: friend class MachOObjectFile; + void moveToFirst(); void moveToEnd(); uint64_t readULEB128(const char **error); @@ -167,15 +185,15 @@ class MachORebaseEntry { const MachOObjectFile *O; ArrayRef Opcodes; const uint8_t *Ptr; - uint64_t SegmentOffset; - int32_t SegmentIndex; - uint64_t RemainingLoopCount; - uint64_t AdvanceAmount; - uint8_t RebaseType; + uint64_t SegmentOffset = 0; + int32_t SegmentIndex = -1; + uint64_t RemainingLoopCount = 0; + uint64_t AdvanceAmount = 0; + uint8_t RebaseType = 0; uint8_t PointerSize; - bool Done; + bool Done = false; }; -typedef content_iterator rebase_iterator; +using rebase_iterator = content_iterator; /// MachOBindEntry encapsulates the current state in the decompression of /// binding opcodes. This allows you to iterate through the compressed table of @@ -209,6 +227,7 @@ class MachOBindEntry { private: friend class MachOObjectFile; + void moveToFirst(); void moveToEnd(); uint64_t readULEB128(const char **error); @@ -218,21 +237,21 @@ class MachOBindEntry { const MachOObjectFile *O; ArrayRef Opcodes; const uint8_t *Ptr; - uint64_t SegmentOffset; - int32_t SegmentIndex; + uint64_t SegmentOffset = 0; + int32_t SegmentIndex = -1; StringRef SymbolName; - bool LibraryOrdinalSet; - int Ordinal; - uint32_t Flags; - int64_t Addend; - uint64_t RemainingLoopCount; - uint64_t AdvanceAmount; - uint8_t BindType; + bool LibraryOrdinalSet = false; + int Ordinal = 0; + uint32_t Flags = 0; + int64_t Addend = 0; + uint64_t RemainingLoopCount = 0; + uint64_t AdvanceAmount = 0; + uint8_t BindType = 0; uint8_t PointerSize; Kind TableKind; - bool Done; + bool Done = false; }; -typedef content_iterator bind_iterator; +using bind_iterator = content_iterator; class MachOObjectFile : public ObjectFile { public: @@ -240,8 +259,8 @@ class MachOObjectFile : public ObjectFile { const char *Ptr; // Where in memory the load command is. MachO::load_command C; // The command itself. }; - typedef SmallVector LoadCommandList; - typedef LoadCommandList::const_iterator load_command_iterator; + using LoadCommandList = SmallVector; + using load_command_iterator = LoadCommandList::const_iterator; static Expected> create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, @@ -563,7 +582,7 @@ class MachOObjectFile : public ObjectFile { case MachO::PLATFORM_BRIDGEOS: return "bridgeos"; default: std::string ret; - llvm::raw_string_ostream ss(ret); + raw_string_ostream ss(ret); ss << format_hex(platform, 8, true); return ss.str(); } @@ -576,7 +595,7 @@ class MachOObjectFile : public ObjectFile { case MachO::TOOL_LD: return "ld"; default: std::string ret; - llvm::raw_string_ostream ss(ret); + raw_string_ostream ss(ret); ss << format_hex(tools, 8, true); return ss.str(); } @@ -595,7 +614,6 @@ class MachOObjectFile : public ObjectFile { } private: - MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, Error &Err, uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0); @@ -606,23 +624,23 @@ class MachOObjectFile : public ObjectFile { MachO::mach_header_64 Header64; MachO::mach_header Header; }; - typedef SmallVector SectionList; + using SectionList = SmallVector; SectionList Sections; - typedef SmallVector LibraryList; + using LibraryList = SmallVector; LibraryList Libraries; LoadCommandList LoadCommands; - typedef SmallVector LibraryShortName; + using LibraryShortName = SmallVector; using BuildToolList = SmallVector; BuildToolList BuildTools; mutable LibraryShortName LibrariesShortNames; std::unique_ptr BindRebaseSectionTable; - const char *SymtabLoadCmd; - const char *DysymtabLoadCmd; - const char *DataInCodeLoadCmd; - const char *LinkOptHintsLoadCmd; - const char *DyldInfoLoadCmd; - const char *UuidLoadCmd; - bool HasPageZeroSegment; + const char *SymtabLoadCmd = nullptr; + const char *DysymtabLoadCmd = nullptr; + const char *DataInCodeLoadCmd = nullptr; + const char *LinkOptHintsLoadCmd = nullptr; + const char *DyldInfoLoadCmd = nullptr; + const char *UuidLoadCmd = nullptr; + bool HasPageZeroSegment = false; }; /// DiceRef @@ -679,7 +697,7 @@ inline const ObjectFile *DiceRef::getObjectFile() const { return OwningObject; } -} -} +} // end namespace object +} // end namespace llvm -#endif +#endif // LLVM_OBJECT_MACHO_H diff --git a/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h b/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h index 713022264ea7..f733f861e2c0 100644 --- a/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h +++ b/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h @@ -1,4 +1,4 @@ -//===- ModuleSummaryIndexObjectFile.h - Summary index file implementation -=// +//===- ModuleSummaryIndexObjectFile.h - Summary index file implementation -===// // // The LLVM Compiler Infrastructure // @@ -14,14 +14,22 @@ #ifndef LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H #define LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H -#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include namespace llvm { + class ModuleSummaryIndex; -class Module; namespace object { + class ObjectFile; /// This class is used to read just the module summary index related @@ -41,15 +49,18 @@ class ModuleSummaryIndexObjectFile : public SymbolicFile { void moveSymbolNext(DataRefImpl &Symb) const override { llvm_unreachable("not implemented"); } + std::error_code printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override { llvm_unreachable("not implemented"); return std::error_code(); } + uint32_t getSymbolFlags(DataRefImpl Symb) const override { llvm_unreachable("not implemented"); return 0; } + basic_symbol_iterator symbol_begin() const override { llvm_unreachable("not implemented"); return basic_symbol_iterator(BasicSymbolRef()); @@ -85,7 +96,8 @@ class ModuleSummaryIndexObjectFile : public SymbolicFile { static Expected> create(MemoryBufferRef Object); }; -} + +} // end namespace object /// Parse the module summary index out of an IR file and return the module /// summary index object if found, or nullptr if not. If Identifier is @@ -94,6 +106,7 @@ class ModuleSummaryIndexObjectFile : public SymbolicFile { /// containing minimized bitcode just for the thin link. Expected> getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier = ""); -} -#endif +} // end namespace llvm + +#endif // LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H diff --git a/contrib/llvm/include/llvm/Object/ModuleSymbolTable.h b/contrib/llvm/include/llvm/Object/ModuleSymbolTable.h index 333301d5b456..9e9322885388 100644 --- a/contrib/llvm/include/llvm/Object/ModuleSymbolTable.h +++ b/contrib/llvm/include/llvm/Object/ModuleSymbolTable.h @@ -1,4 +1,4 @@ -//===- ModuleSymbolTable.h - symbol table for in-memory IR ----------------===// +//===- ModuleSymbolTable.h - symbol table for in-memory IR ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,22 +16,24 @@ #ifndef LLVM_OBJECT_MODULESYMBOLTABLE_H #define LLVM_OBJECT_MODULESYMBOLTABLE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/Triple.h" #include "llvm/IR/Mangler.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Allocator.h" +#include #include #include +#include namespace llvm { class GlobalValue; -class RecordStreamer; class ModuleSymbolTable { public: - typedef std::pair AsmSymbol; - typedef PointerUnion Symbol; + using AsmSymbol = std::pair; + using Symbol = PointerUnion; private: Module *FirstMod = nullptr; @@ -57,6 +59,6 @@ class ModuleSymbolTable { function_ref AsmSymbol); }; -} +} // end namespace llvm -#endif +#endif // LLVM_OBJECT_MODULESYMBOLTABLE_H diff --git a/contrib/llvm/include/llvm/Object/RelocVisitor.h b/contrib/llvm/include/llvm/Object/RelocVisitor.h index 3a0a62d9283b..73c7ce367cb0 100644 --- a/contrib/llvm/include/llvm/Object/RelocVisitor.h +++ b/contrib/llvm/include/llvm/Object/RelocVisitor.h @@ -1,4 +1,4 @@ -//===-- RelocVisitor.h - Visitor for object file relocations -*- C++ -*-===// +//===- RelocVisitor.h - Visitor for object file relocations -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,34 +16,38 @@ #ifndef LLVM_OBJECT_RELOCVISITOR_H #define LLVM_OBJECT_RELOCVISITOR_H +#include "llvm/ADT/Triple.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" +#include +#include namespace llvm { namespace object { struct RelocToApply { // The computed value after applying the relevant relocations. - int64_t Value; + int64_t Value = 0; // The width of the value; how many bytes to touch when applying the // relocation. - char Width; + char Width = 0; + + RelocToApply() = default; RelocToApply(int64_t Value, char Width) : Value(Value), Width(Width) {} - RelocToApply() : Value(0), Width(0) {} }; /// @brief Base class for object file relocation visitors. class RelocVisitor { public: - explicit RelocVisitor(const ObjectFile &Obj) - : ObjToVisit(Obj), HasError(false) {} + explicit RelocVisitor(const ObjectFile &Obj) : ObjToVisit(Obj) {} // TODO: Should handle multiple applied relocations via either passing in the // previously computed value or just count paired relocations as a single @@ -64,22 +68,22 @@ class RelocVisitor { private: const ObjectFile &ObjToVisit; - bool HasError; + bool HasError = false; RelocToApply visitELF(uint32_t RelocType, RelocationRef R, uint64_t Value) { if (ObjToVisit.getBytesInAddress() == 8) { // 64-bit object file switch (ObjToVisit.getArch()) { case Triple::x86_64: switch (RelocType) { - case llvm::ELF::R_X86_64_NONE: + case ELF::R_X86_64_NONE: return visitELF_X86_64_NONE(R); - case llvm::ELF::R_X86_64_64: + case ELF::R_X86_64_64: return visitELF_X86_64_64(R, Value); - case llvm::ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC32: return visitELF_X86_64_PC32(R, Value); - case llvm::ELF::R_X86_64_32: + case ELF::R_X86_64_32: return visitELF_X86_64_32(R, Value); - case llvm::ELF::R_X86_64_32S: + case ELF::R_X86_64_32S: return visitELF_X86_64_32S(R, Value); default: HasError = true; @@ -88,9 +92,9 @@ class RelocVisitor { case Triple::aarch64: case Triple::aarch64_be: switch (RelocType) { - case llvm::ELF::R_AARCH64_ABS32: + case ELF::R_AARCH64_ABS32: return visitELF_AARCH64_ABS32(R, Value); - case llvm::ELF::R_AARCH64_ABS64: + case ELF::R_AARCH64_ABS64: return visitELF_AARCH64_ABS64(R, Value); default: HasError = true; @@ -99,9 +103,9 @@ class RelocVisitor { case Triple::bpfel: case Triple::bpfeb: switch (RelocType) { - case llvm::ELF::R_BPF_64_64: + case ELF::R_BPF_64_64: return visitELF_BPF_64_64(R, Value); - case llvm::ELF::R_BPF_64_32: + case ELF::R_BPF_64_32: return visitELF_BPF_64_32(R, Value); default: HasError = true; @@ -110,9 +114,9 @@ class RelocVisitor { case Triple::mips64el: case Triple::mips64: switch (RelocType) { - case llvm::ELF::R_MIPS_32: + case ELF::R_MIPS_32: return visitELF_MIPS64_32(R, Value); - case llvm::ELF::R_MIPS_64: + case ELF::R_MIPS_64: return visitELF_MIPS64_64(R, Value); default: HasError = true; @@ -121,9 +125,9 @@ class RelocVisitor { case Triple::ppc64le: case Triple::ppc64: switch (RelocType) { - case llvm::ELF::R_PPC64_ADDR32: + case ELF::R_PPC64_ADDR32: return visitELF_PPC64_ADDR32(R, Value); - case llvm::ELF::R_PPC64_ADDR64: + case ELF::R_PPC64_ADDR64: return visitELF_PPC64_ADDR64(R, Value); default: HasError = true; @@ -131,9 +135,9 @@ class RelocVisitor { } case Triple::systemz: switch (RelocType) { - case llvm::ELF::R_390_32: + case ELF::R_390_32: return visitELF_390_32(R, Value); - case llvm::ELF::R_390_64: + case ELF::R_390_64: return visitELF_390_64(R, Value); default: HasError = true; @@ -141,11 +145,11 @@ class RelocVisitor { } case Triple::sparcv9: switch (RelocType) { - case llvm::ELF::R_SPARC_32: - case llvm::ELF::R_SPARC_UA32: + case ELF::R_SPARC_32: + case ELF::R_SPARC_UA32: return visitELF_SPARCV9_32(R, Value); - case llvm::ELF::R_SPARC_64: - case llvm::ELF::R_SPARC_UA64: + case ELF::R_SPARC_64: + case ELF::R_SPARC_UA64: return visitELF_SPARCV9_64(R, Value); default: HasError = true; @@ -153,9 +157,9 @@ class RelocVisitor { } case Triple::amdgcn: switch (RelocType) { - case llvm::ELF::R_AMDGPU_ABS32: + case ELF::R_AMDGPU_ABS32: return visitELF_AMDGPU_ABS32(R, Value); - case llvm::ELF::R_AMDGPU_ABS64: + case ELF::R_AMDGPU_ABS64: return visitELF_AMDGPU_ABS64(R, Value); default: HasError = true; @@ -169,11 +173,11 @@ class RelocVisitor { switch (ObjToVisit.getArch()) { case Triple::x86: switch (RelocType) { - case llvm::ELF::R_386_NONE: + case ELF::R_386_NONE: return visitELF_386_NONE(R); - case llvm::ELF::R_386_32: + case ELF::R_386_32: return visitELF_386_32(R, Value); - case llvm::ELF::R_386_PC32: + case ELF::R_386_PC32: return visitELF_386_PC32(R, Value); default: HasError = true; @@ -181,7 +185,7 @@ class RelocVisitor { } case Triple::ppc: switch (RelocType) { - case llvm::ELF::R_PPC_ADDR32: + case ELF::R_PPC_ADDR32: return visitELF_PPC_ADDR32(R, Value); default: HasError = true; @@ -193,12 +197,12 @@ class RelocVisitor { default: HasError = true; return RelocToApply(); - case llvm::ELF::R_ARM_ABS32: + case ELF::R_ARM_ABS32: return visitELF_ARM_ABS32(R, Value); } case Triple::lanai: switch (RelocType) { - case llvm::ELF::R_LANAI_32: + case ELF::R_LANAI_32: return visitELF_Lanai_32(R, Value); default: HasError = true; @@ -207,7 +211,7 @@ class RelocVisitor { case Triple::mipsel: case Triple::mips: switch (RelocType) { - case llvm::ELF::R_MIPS_32: + case ELF::R_MIPS_32: return visitELF_MIPS_32(R, Value); default: HasError = true; @@ -215,8 +219,8 @@ class RelocVisitor { } case Triple::sparc: switch (RelocType) { - case llvm::ELF::R_SPARC_32: - case llvm::ELF::R_SPARC_UA32: + case ELF::R_SPARC_32: + case ELF::R_SPARC_UA32: return visitELF_SPARC_32(R, Value); default: HasError = true; @@ -224,7 +228,7 @@ class RelocVisitor { } case Triple::hexagon: switch (RelocType) { - case llvm::ELF::R_HEX_32: + case ELF::R_HEX_32: return visitELF_HEX_32(R, Value); default: HasError = true; @@ -483,6 +487,7 @@ class RelocVisitor { } }; -} -} -#endif +} // end namespace object +} // end namespace llvm + +#endif // LLVM_OBJECT_RELOCVISITOR_H diff --git a/contrib/llvm/include/llvm/Object/StackMapParser.h b/contrib/llvm/include/llvm/Object/StackMapParser.h index efea62bb3cb3..0c5e1e38cbaa 100644 --- a/contrib/llvm/include/llvm/Object/StackMapParser.h +++ b/contrib/llvm/include/llvm/Object/StackMapParser.h @@ -1,4 +1,4 @@ -//===-------- StackMapParser.h - StackMap Parsing Support -------*- C++ -*-===// +//===- StackMapParser.h - StackMap Parsing Support --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,11 @@ #define LLVM_CODEGEN_STACKMAPPARSER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Endian.h" +#include +#include +#include #include namespace llvm { @@ -19,12 +23,11 @@ namespace llvm { template class StackMapV2Parser { public: - template class AccessorIterator { public: - AccessorIterator(AccessorT A) : A(A) {} + AccessorIterator& operator++() { A = A.next(); return *this; } AccessorIterator operator++(int) { auto tmp = *this; @@ -48,8 +51,8 @@ class StackMapV2Parser { /// Accessor for function records. class FunctionAccessor { friend class StackMapV2Parser; - public: + public: /// Get the function address. uint64_t getFunctionAddress() const { return read(P); @@ -80,13 +83,12 @@ class StackMapV2Parser { /// Accessor for constants. class ConstantAccessor { friend class StackMapV2Parser; - public: + public: /// Return the value of this constant. uint64_t getValue() const { return read(P); } private: - ConstantAccessor(const uint8_t *P) : P(P) {} const static int ConstantAccessorSize = sizeof(uint64_t); @@ -98,20 +100,16 @@ class StackMapV2Parser { const uint8_t *P; }; - // Forward-declare RecordAccessor so we can friend it below. - class RecordAccessor; - enum class LocationKind : uint8_t { Register = 1, Direct = 2, Indirect = 3, Constant = 4, ConstantIndex = 5 }; - /// Accessor for location records. class LocationAccessor { friend class StackMapV2Parser; friend class RecordAccessor; - public: + public: /// Get the Kind for this location. LocationKind getKind() const { return LocationKind(P[KindOffset]); @@ -144,7 +142,6 @@ class StackMapV2Parser { } private: - LocationAccessor(const uint8_t *P) : P(P) {} LocationAccessor next() const { @@ -163,8 +160,8 @@ class StackMapV2Parser { class LiveOutAccessor { friend class StackMapV2Parser; friend class RecordAccessor; - public: + public: /// Get the Dwarf register number for this live-out. uint16_t getDwarfRegNum() const { return read(P + DwarfRegNumOffset); @@ -176,7 +173,6 @@ class StackMapV2Parser { } private: - LiveOutAccessor(const uint8_t *P) : P(P) {} LiveOutAccessor next() const { @@ -194,10 +190,10 @@ class StackMapV2Parser { /// Accessor for stackmap records. class RecordAccessor { friend class StackMapV2Parser; - public: - typedef AccessorIterator location_iterator; - typedef AccessorIterator liveout_iterator; + public: + using location_iterator = AccessorIterator; + using liveout_iterator = AccessorIterator; /// Get the patchpoint/stackmap ID for this record. uint64_t getID() const { @@ -254,7 +250,6 @@ class StackMapV2Parser { return liveout_iterator(getLiveOut(0)); } - /// End iterator for live-outs. liveout_iterator liveouts_end() const { return liveout_iterator(getLiveOut(getNumLiveOuts())); @@ -266,7 +261,6 @@ class StackMapV2Parser { } private: - RecordAccessor(const uint8_t *P) : P(P) {} unsigned getNumLiveOutsOffset() const { @@ -316,9 +310,9 @@ class StackMapV2Parser { } } - typedef AccessorIterator function_iterator; - typedef AccessorIterator constant_iterator; - typedef AccessorIterator record_iterator; + using function_iterator = AccessorIterator; + using constant_iterator = AccessorIterator; + using record_iterator = AccessorIterator; /// Get the version number of this stackmap. (Always returns 2). unsigned getVersion() const { return 2; } @@ -413,7 +407,6 @@ class StackMapV2Parser { } private: - template static T read(const uint8_t *P) { return support::endian::read(P); @@ -441,6 +434,6 @@ class StackMapV2Parser { std::vector StackMapRecordOffsets; }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_STACKMAPPARSER_H diff --git a/contrib/llvm/include/llvm/Object/Wasm.h b/contrib/llvm/include/llvm/Object/Wasm.h index 43ad62be68b6..6b6bbe252f65 100644 --- a/contrib/llvm/include/llvm/Object/Wasm.h +++ b/contrib/llvm/include/llvm/Object/Wasm.h @@ -17,6 +17,8 @@ #ifndef LLVM_OBJECT_WASM_H #define LLVM_OBJECT_WASM_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" @@ -47,10 +49,10 @@ class WasmSymbol { class WasmSection { public: - WasmSection() : Type(0), Offset(0) {} + WasmSection() = default; - uint32_t Type; // Section type (See below) - uint32_t Offset; // Offset with in the file + uint32_t Type = 0; // Section type (See below) + uint32_t Offset = 0; // Offset with in the file StringRef Name; // Section name (User-defined sections only) ArrayRef Content; // Section content std::vector Relocations; // Relocations for this section @@ -74,12 +76,15 @@ class WasmObjectFile : public ObjectFile { const std::vector& memories() const { return Memories; } const std::vector& globals() const { return Globals; } const std::vector& exports() const { return Exports; } + const std::vector& elements() const { return ElemSegments; } + const std::vector& dataSegments() const { return DataSegments; } + const std::vector& functions() const { return Functions; } const ArrayRef& code() const { return CodeSection; } uint32_t startFunction() const { return StartFunction; } @@ -178,7 +183,7 @@ class WasmObjectFile : public ObjectFile { std::vector Symbols; std::vector Functions; ArrayRef CodeSection; - uint32_t StartFunction; + uint32_t StartFunction = -1; }; } // end namespace object diff --git a/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h b/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h index b1af8bbdfa6e..dfeeb8589f82 100644 --- a/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h +++ b/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h @@ -88,7 +88,7 @@ struct Relocation { RelocType Type; uint32_t Index; yaml::Hex32 Offset; - yaml::Hex32 Addend; + int32_t Addend; }; struct DataSegment { diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h index c9828858cce3..1b07c33746e7 100644 --- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h +++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h @@ -79,14 +79,6 @@ inline StringRef getInstrProfValueRangeProfFuncName() { return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; } -/// Return the name of the section containing function coverage mapping -/// data. -std::string getInstrProfCoverageSectionName(const Module *M = nullptr); -/// Similar to the above, but used by host tool (e.g, coverage) which has -/// object format information. The section name returned is not prefixed -/// with segment name. -std::string getInstrProfCoverageSectionNameInObject(bool isCoff); - /// Return the name prefix of variables containing instrumented function names. inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } diff --git a/contrib/llvm/include/llvm/Support/BranchProbability.h b/contrib/llvm/include/llvm/Support/BranchProbability.h index e8eb50d53eb6..b403d7fbf117 100644 --- a/contrib/llvm/include/llvm/Support/BranchProbability.h +++ b/contrib/llvm/include/llvm/Support/BranchProbability.h @@ -112,6 +112,13 @@ class BranchProbability { return *this; } + BranchProbability &operator*=(uint32_t RHS) { + assert(N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + N = (uint64_t(N) * RHS > D) ? D : N * RHS; + return *this; + } + BranchProbability &operator/=(uint32_t RHS) { assert(N != UnknownN && "Unknown probability cannot participate in arithmetics."); @@ -135,6 +142,11 @@ class BranchProbability { return Prob *= RHS; } + BranchProbability operator*(uint32_t RHS) const { + BranchProbability Prob(*this); + return Prob *= RHS; + } + BranchProbability operator/(uint32_t RHS) const { BranchProbability Prob(*this); return Prob /= RHS; diff --git a/contrib/llvm/include/llvm/Support/CMakeLists.txt b/contrib/llvm/include/llvm/Support/CMakeLists.txt deleted file mode 100644 index b4b993705745..000000000000 --- a/contrib/llvm/include/llvm/Support/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Figure out if we can track VC revisions. -function(find_first_existing_file out_var) - foreach(file ${ARGN}) - if(EXISTS "${file}") - set(${out_var} "${file}" PARENT_SCOPE) - return() - endif() - endforeach() -endfunction() - -macro(find_first_existing_vc_file out_var path) - find_program(git_executable NAMES git git.exe git.cmd) - # Run from a subdirectory to force git to print an absolute path. - execute_process(COMMAND ${git_executable} rev-parse --git-dir - WORKING_DIRECTORY ${path}/cmake - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_dir) - if(git_result EQUAL 0) - string(STRIP "${git_dir}" git_dir) - set(${out_var} "${git_dir}/logs/HEAD") - else() - find_first_existing_file(${out_var} - "${path}/.svn/wc.db" # SVN 1.7 - "${path}/.svn/entries" # SVN 1.6 - ) - endif() -endmacro() - -find_first_existing_vc_file(llvm_vc "${LLVM_MAIN_SRC_DIR}") - -# The VC revision include that we want to generate. -set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/VCSRevision.h") - -set(get_svn_script "${LLVM_CMAKE_PATH}/GenerateVersionFromCVS.cmake") - -if(DEFINED llvm_vc) - # Create custom target to generate the VC revision include. - add_custom_command(OUTPUT "${version_inc}" - DEPENDS "${llvm_vc}" "${get_svn_script}" - COMMAND - ${CMAKE_COMMAND} "-DSOURCE_DIR=${LLVM_MAIN_SRC_DIR}" - "-DNAME=LLVM_REVISION" - "-DHEADER_FILE=${version_inc}" - -P "${get_svn_script}") - - # Mark the generated header as being generated. - set_source_files_properties("${version_inc}" - PROPERTIES GENERATED TRUE - HEADER_FILE_ONLY TRUE) -else() - file(WRITE "${version_inc}" "") -endif() - -add_custom_target(llvm_vcsrevision_h DEPENDS "${version_inc}") diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h index 29515c231bc4..e3c5de7fbe64 100644 --- a/contrib/llvm/include/llvm/Support/FileSystem.h +++ b/contrib/llvm/include/llvm/Support/FileSystem.h @@ -116,7 +116,9 @@ inline perms &operator&=(perms &l, perms r) { return l; } inline perms operator~(perms x) { - return static_cast(~static_cast(x)); + // Avoid UB by explicitly truncating the (unsigned) ~ result. + return static_cast( + static_cast(~static_cast(x))); } class UniqueID { diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h index eb7c27d2ffa5..851ff7d80403 100644 --- a/contrib/llvm/include/llvm/Support/GenericDomTree.h +++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h @@ -286,13 +286,13 @@ template class DominatorTreeBase : public DominatorBase { NodeRef NewBBSucc = *GraphT::child_begin(NewBB); std::vector PredBlocks; - for (const auto Pred : children>(NewBB)) + for (const auto &Pred : children>(NewBB)) PredBlocks.push_back(Pred); assert(!PredBlocks.empty() && "No predblocks?"); bool NewBBDominatesNewBBSucc = true; - for (const auto Pred : children>(NewBBSucc)) { + for (const auto &Pred : children>(NewBBSucc)) { if (Pred != NewBB && !dominates(NewBBSucc, Pred) && isReachableFromEntry(Pred)) { NewBBDominatesNewBBSucc = false; diff --git a/contrib/llvm/include/llvm/Support/KnownBits.h b/contrib/llvm/include/llvm/Support/KnownBits.h new file mode 100644 index 000000000000..08d4dedd0ac8 --- /dev/null +++ b/contrib/llvm/include/llvm/Support/KnownBits.h @@ -0,0 +1,43 @@ +//===- llvm/Support/KnownBits.h - Stores known zeros/ones -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a class for representing known zeros and ones used by +// computeKnownBits. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_KNOWNBITS_H +#define LLVM_SUPPORT_KNOWNBITS_H + +#include "llvm/ADT/APInt.h" + +namespace llvm { + +// For now this is a simple wrapper around two APInts. +struct KnownBits { + APInt Zero; + APInt One; + + // Default construct Zero and One. + KnownBits() {} + + /// Create a known bits object of BitWidth bits initialized to unknown. + KnownBits(unsigned BitWidth) : Zero(BitWidth, 0), One(BitWidth, 0) {} + + /// Get the bit width of this value. + unsigned getBitWidth() const { + assert(Zero.getBitWidth() == One.getBitWidth() && + "Zero and One should have the same width!"); + return Zero.getBitWidth(); + } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h index 6d02e4aba48a..ffea679fab82 100644 --- a/contrib/llvm/include/llvm/Support/YAMLTraits.h +++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h @@ -606,7 +606,7 @@ class IO { template void bitSetCase(T &Val, const char* Str, const T ConstVal) { if ( bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal) ) { - Val = Val | ConstVal; + Val = static_cast(Val | ConstVal); } } @@ -614,7 +614,7 @@ class IO { template void bitSetCase(T &Val, const char* Str, const uint32_t ConstVal) { if ( bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal) ) { - Val = Val | ConstVal; + Val = static_cast(Val | ConstVal); } } diff --git a/contrib/llvm/include/llvm/Target/GlobalISel/Target.td b/contrib/llvm/include/llvm/Target/GlobalISel/Target.td index fa1a424b5895..fd2ebca86d60 100644 --- a/contrib/llvm/include/llvm/Target/GlobalISel/Target.td +++ b/contrib/llvm/include/llvm/Target/GlobalISel/Target.td @@ -30,21 +30,13 @@ def s64 : LLT; // Definitions that inherit from this may also inherit from // GIComplexPatternEquiv to enable the import of SelectionDAG patterns involving // those ComplexPatterns. -class GIComplexOperandMatcher { +class GIComplexOperandMatcher { // The expected type of the root of the match. // // TODO: We should probably support, any-type, any-scalar, and multiple types // in the future. LLT Type = type; - // The operands that result from a successful match - // Should be of the form '(ops ty1, ty2, ...)' where ty1/ty2 are definitions - // that inherit from Operand. - // - // FIXME: Which definition is used for ty1/ty2 doesn't actually matter at the - // moment. Only the number of operands is used. - dag Operands = operands; - // The function that determines whether the operand matches. It should be of // the form: // bool select(const MatchOperand &Root, MatchOperand &Result1) diff --git a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h index 0dc9cf70d335..82a682cf1f7e 100644 --- a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h @@ -1108,7 +1108,7 @@ class TargetInstrInfo : public MCInstrInfo { /// Return the noop instruction to use for a noop. - virtual void getNoopForMachoTarget(MCInst &NopInst) const; + virtual void getNoop(MCInst &NopInst) const; /// Return true for post-incremented instructions. virtual bool isPostIncrement(const MachineInstr &MI) const { diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h index 24039ea10816..51f11e1a9a25 100644 --- a/contrib/llvm/include/llvm/Target/TargetLowering.h +++ b/contrib/llvm/include/llvm/Target/TargetLowering.h @@ -236,6 +236,12 @@ class TargetLoweringBase { return getPointerTy(DL, DL.getAllocaAddrSpace()); } + /// Return the type for operands of fence. + /// TODO: Let fence operands be of i32 type and remove this. + virtual MVT getFenceOperandTy(const DataLayout &DL) const { + return getPointerTy(DL); + } + /// EVT is not used in-tree, but is used by out-of-tree target. /// A documentation for this function would be nice... virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; @@ -2268,7 +2274,8 @@ class TargetLoweringBase { /// Return true if the value types that can be represented by the specified /// register class are all legal. - bool isLegalRC(const TargetRegisterClass *RC) const; + bool isLegalRC(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) const; /// Replace/modify any TargetFrameIndex operands with a targte-dependent /// sequence of memory operands that is recognized by PrologEpilogInserter. @@ -2388,30 +2395,39 @@ class TargetLowering : public TargetLoweringBase { New = N; return true; } - - /// Check to see if the specified operand of the specified instruction is a - /// constant integer. If so, check to see if there are any bits set in the - /// constant that are not demanded. If so, shrink the constant and return - /// true. - bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded); - - /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This - /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be - /// generalized for targets with other types of implicit widening casts. - bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, - const SDLoc &dl); - - /// Helper for SimplifyDemandedBits that can simplify an operation with - /// multiple uses. This function uses TLI.SimplifyDemandedBits to - /// simplify Operand \p OpIdx of \p User and then updated \p User with - /// the simplified version. No other uses of \p OpIdx are updated. - /// If \p User is the only user of \p OpIdx, this function behaves exactly - /// like TLI.SimplifyDemandedBits except that it also updates the DAG by - /// calling DCI.CommitTargetLoweringOpt. - bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, - const APInt &Demanded, DAGCombinerInfo &DCI); }; + /// Check to see if the specified operand of the specified instruction is a + /// constant integer. If so, check to see if there are any bits set in the + /// constant that are not demanded. If so, shrink the constant and return + /// true. + bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + // Target hook to do target-specific const optimization, which is called by + // ShrinkDemandedConstant. This function should return true if the target + // doesn't want ShrinkDemandedConstant to further optimize the constant. + virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + return false; + } + + /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This + /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be + /// generalized for targets with other types of implicit widening casts. + bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + /// Helper for SimplifyDemandedBits that can simplify an operation with + /// multiple uses. This function simplifies operand \p OpIdx of \p User and + /// then updates \p User with the simplified version. No other uses of + /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this + /// function behaves exactly like function SimplifyDemandedBits declared + /// below except that it also updates the DAG by calling + /// DCI.CommitTargetLoweringOpt. + bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded, + DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const; + /// Look at Op. At this point, we know that only the DemandedMask bits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning diff --git a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h index 3f5daea63ab5..4ce6d2ff5e26 100644 --- a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h @@ -40,13 +40,12 @@ class TargetRegisterClass { public: typedef const MCPhysReg* iterator; typedef const MCPhysReg* const_iterator; - typedef const MVT::SimpleValueType* vt_iterator; typedef const TargetRegisterClass* const * sc_iterator; // Instance variables filled by tablegen, do not use! const MCRegisterClass *MC; const uint16_t SpillSize, SpillAlignment; - const vt_iterator VTs; + const MVT::SimpleValueType *VTs; const uint32_t *SubClassMask; const uint16_t *SuperRegIndices; const LaneBitmask LaneMask; @@ -93,13 +92,6 @@ class TargetRegisterClass { return MC->contains(Reg1, Reg2); } - /// Return the size of the register in bytes, which is also the size - /// of a stack slot allocated to hold a spilled copy of this register. - unsigned getSize() const { return SpillSize; } - - /// Return the minimum required alignment for a register of this class. - unsigned getAlignment() const { return SpillAlignment; } - /// Return the cost of copying a value between two registers in this class. /// A negative number means the register class is very expensive /// to copy e.g. status flag register classes. @@ -109,26 +101,6 @@ class TargetRegisterClass { /// registers. bool isAllocatable() const { return MC->isAllocatable(); } - /// Return true if this TargetRegisterClass has the ValueType vt. - bool hasType(MVT vt) const { - for(int i = 0; VTs[i] != MVT::Other; ++i) - if (MVT(VTs[i]) == vt) - return true; - return false; - } - - /// vt_begin / vt_end - Loop over all of the value types that can be - /// represented by values in this register class. - vt_iterator vt_begin() const { - return VTs; - } - - vt_iterator vt_end() const { - vt_iterator I = VTs; - while (*I != MVT::Other) ++I; - return I; - } - /// Return true if the specified TargetRegisterClass /// is a proper sub-class of this TargetRegisterClass. bool hasSubClass(const TargetRegisterClass *RC) const { @@ -246,6 +218,7 @@ struct RegClassWeight { class TargetRegisterInfo : public MCRegisterInfo { public: typedef const TargetRegisterClass * const * regclass_iterator; + typedef const MVT::SimpleValueType* vt_iterator; private: const TargetRegisterInfoDesc *InfoDesc; // Extra desc array for codegen const char *const *SubRegIndexNames; // Names of subreg indexes. @@ -327,6 +300,44 @@ class TargetRegisterInfo : public MCRegisterInfo { return Index | (1u << 31); } + /// Return the size in bits of a register from class RC. + unsigned getRegSizeInBits(const TargetRegisterClass &RC) const { + return RC.SpillSize * 8; + } + + /// Return the size in bytes of the stack slot allocated to hold a spilled + /// copy of a register from class RC. + unsigned getSpillSize(const TargetRegisterClass &RC) const { + return RC.SpillSize; + } + + /// Return the minimum required alignment for a spill slot for a register + /// of this class. + unsigned getSpillAlignment(const TargetRegisterClass &RC) const { + return RC.SpillAlignment; + } + + /// Return true if the given TargetRegisterClass has the ValueType T. + bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const { + for (int i = 0; RC.VTs[i] != MVT::Other; ++i) + if (MVT(RC.VTs[i]) == T) + return true; + return false; + } + + /// Loop over all of the value types that can be represented by values + // in the given register class. + vt_iterator legalclasstypes_begin(const TargetRegisterClass &RC) const { + return RC.VTs; + } + + vt_iterator legalclasstypes_end(const TargetRegisterClass &RC) const { + vt_iterator I = RC.VTs; + while (*I != MVT::Other) + ++I; + return I; + } + /// Returns the Register Class of a physical register of the given type, /// picking the most sub register class of the right type that contains this /// physreg. diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation.h index 01a3975a4f2c..db6723da1e61 100644 --- a/contrib/llvm/include/llvm/Transforms/Instrumentation.h +++ b/contrib/llvm/include/llvm/Transforms/Instrumentation.h @@ -131,7 +131,8 @@ FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false); ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false, - bool Recover = false); + bool Recover = false, + bool UseGlobalsGC = true); // Insert MemorySanitizer instrumentation (detection of uninitialized reads) FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0, diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h index 3e2b3327a9fe..edc91add7a73 100644 --- a/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ b/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -36,6 +36,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" @@ -98,7 +99,7 @@ class ConstantHoistingPass : public PassInfoMixin { // Glue for old PM. bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - BasicBlock &Entry); + BlockFrequencyInfo *BFI, BasicBlock &Entry); void releaseMemory() { ConstantVec.clear(); @@ -112,6 +113,7 @@ class ConstantHoistingPass : public PassInfoMixin { const TargetTransformInfo *TTI; DominatorTree *DT; + BlockFrequencyInfo *BFI; BasicBlock *Entry; /// Keeps track of constant candidates found in the function. @@ -124,8 +126,8 @@ class ConstantHoistingPass : public PassInfoMixin { SmallVector ConstantVec; Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const; - Instruction *findConstantInsertionPoint( - const consthoist::ConstantInfo &ConstInfo) const; + SmallPtrSet + findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const; void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, ConstantInt *ConstInt); diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3db041cc0fa6..537823020301 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -924,8 +924,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V2Size, const DataLayout &DL) { - assert(GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && + assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == + GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && GEP1->getPointerOperandType() == GEP2->getPointerOperandType() && "Expected GEPs with the same pointer operand"); @@ -1184,8 +1184,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // If we know the two GEPs are based off of the exact same pointer (and not // just the same underlying object), see if that tells us anything about // the resulting pointers. - if (GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && + if (GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == + GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) { AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); // If we couldn't find anything interesting, don't abandon just yet. @@ -1500,8 +1500,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Strip off any casts if they exist. - V1 = V1->stripPointerCasts(); - V2 = V2->stripPointerCasts(); + V1 = V1->stripPointerCastsAndBarriers(); + V2 = V2->stripPointerCastsAndBarriers(); // If V1 or V2 is undef, the result is NoAlias because we can always pick a // value for undef that aliases nothing in the program. diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 14176dac2104..863fbdba7e67 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include #include @@ -687,21 +688,21 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, if (Opc == Instruction::And) { unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType()); - APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); - APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); - computeKnownBits(Op0, KnownZero0, KnownOne0, DL); - computeKnownBits(Op1, KnownZero1, KnownOne1, DL); - if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + KnownBits Known0(BitWidth); + KnownBits Known1(BitWidth); + computeKnownBits(Op0, Known0, DL); + computeKnownBits(Op1, Known1, DL); + if ((Known1.One | Known0.Zero).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; } - if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + if ((Known0.One | Known1.Zero).isAllOnesValue()) { // All the bits of Op1 that the 'and' could be masking are already zero. return Op1; } - APInt KnownZero = KnownZero0 | KnownZero1; - APInt KnownOne = KnownOne0 & KnownOne1; + APInt KnownZero = Known0.Zero | Known1.Zero; + APInt KnownOne = Known0.One & Known1.One; if ((KnownZero | KnownOne).isAllOnesValue()) { return ConstantInt::get(Op0->getType(), KnownOne); } diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp index 151c0b0e6c93..285339deaaf5 100644 --- a/contrib/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -72,8 +73,7 @@ static bool isAlwaysLive(Instruction *I) { void DemandedBits::determineLiveOperandBits( const Instruction *UserI, const Instruction *I, unsigned OperandNo, - const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2) { + const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) { unsigned BitWidth = AB.getBitWidth(); // We're called once per operand, but for some instructions, we need to @@ -85,15 +85,13 @@ void DemandedBits::determineLiveOperandBits( auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1, const Value *V2) { const DataLayout &DL = I->getModule()->getDataLayout(); - KnownZero = APInt(BitWidth, 0); - KnownOne = APInt(BitWidth, 0); - computeKnownBits(const_cast(V1), KnownZero, KnownOne, DL, 0, + Known = KnownBits(BitWidth); + computeKnownBits(const_cast(V1), Known, DL, 0, &AC, UserI, &DT); if (V2) { - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); - computeKnownBits(const_cast(V2), KnownZero2, KnownOne2, DL, + Known2 = KnownBits(BitWidth); + computeKnownBits(const_cast(V2), Known2, DL, 0, &AC, UserI, &DT); } }; @@ -120,7 +118,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getHighBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countLeadingZeros()+1)); + std::min(BitWidth, Known.One.countLeadingZeros()+1)); } break; case Intrinsic::cttz: @@ -130,7 +128,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getLowBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countTrailingZeros()+1)); + std::min(BitWidth, Known.One.countTrailingZeros()+1)); } break; } @@ -200,11 +198,11 @@ void DemandedBits::determineLiveOperandBits( // dead). if (OperandNo == 0) { ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownZero2; + AB &= ~Known2.Zero; } else { if (!isa(UserI->getOperand(0))) ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownZero & ~KnownZero2); + AB &= ~(Known.Zero & ~Known2.Zero); } break; case Instruction::Or: @@ -216,11 +214,11 @@ void DemandedBits::determineLiveOperandBits( // dead). if (OperandNo == 0) { ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownOne2; + AB &= ~Known2.One; } else { if (!isa(UserI->getOperand(0))) ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownOne & ~KnownOne2); + AB &= ~(Known.One & ~Known2.One); } break; case Instruction::Xor: @@ -318,7 +316,7 @@ void DemandedBits::performAnalysis() { if (!UserI->getType()->isIntegerTy()) Visited.insert(UserI); - APInt KnownZero, KnownOne, KnownZero2, KnownOne2; + KnownBits Known, Known2; // Compute the set of alive bits for each operand. These are anded into the // existing set, if any, and if that changes the set of alive bits, the // operand is added to the work-list. @@ -335,8 +333,7 @@ void DemandedBits::performAnalysis() { // Bits of each operand that are used to compute alive bits of the // output are alive, all others are dead. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, - KnownZero, KnownOne, - KnownZero2, KnownOne2); + Known, Known2); } // If we've added to the set of alive bits (or the operand has not diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp index 7acfb41500d4..8abc0e7d0df9 100644 --- a/contrib/llvm/lib/Analysis/DomPrinter.cpp +++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp @@ -80,6 +80,22 @@ struct DOTGraphTraits }; } +void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "DomTree dump not available, build with DEBUG\n"; +#endif // NDEBUG +} + +void DominatorTree::viewGraph() { +#ifndef NDEBUG + this->viewGraph("domtree", "Dominator Tree for function"); +#else + errs() << "DomTree dump not available, build with DEBUG\n"; +#endif // NDEBUG +} + namespace { struct DominatorTreeWrapperPassAnalysisGraphTraits { static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) { diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index fde805a5fde5..c30feb973e60 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -253,18 +253,8 @@ bool IVUsers::AddUsersImpl(Instruction *I, const SCEV *OriginalISE = ISE; auto NormalizePred = [&](const SCEVAddRecExpr *AR) { - // We only allow affine AddRecs to be normalized, otherwise we would not - // be able to correctly denormalize. - // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} - // Normalized form: {-2,+,1,+,2} - // Denormalized form: {1,+,3,+,2} - // - // However, denormalization would use a different step expression than - // normalization (see getPostIncExpr), generating the wrong final - // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} auto *L = AR->getLoop(); - bool Result = - AR->isAffine() && IVUseShouldUsePostIncValue(User, I, L, DT); + bool Result = IVUseShouldUsePostIncValue(User, I, L, DT); if (Result) NewUse.PostIncLoops.insert(L); return Result; diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index 1f8dec2aed80..788f908bafca 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -1556,7 +1556,6 @@ InlineParams llvm::getInlineParams(int Threshold) { // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. Params.ColdCallSiteThreshold = ColdCallSiteThreshold; - // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // -inlinehint-threshold commandline option is not explicitly given. If that // option is present, then its value applies even for callees with size and diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 2259fbaeb982..e720e3ebecdb 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/KnownBits.h" #include using namespace llvm; using namespace llvm::PatternMatch; @@ -46,34 +47,19 @@ enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumReassoc, "Number of reassociations"); -namespace { -struct Query { - const DataLayout &DL; - const TargetLibraryInfo *TLI; - const DominatorTree *DT; - AssumptionCache *AC; - const Instruction *CxtI; - - Query(const DataLayout &DL, const TargetLibraryInfo *tli, - const DominatorTree *dt, AssumptionCache *ac = nullptr, - const Instruction *cxti = nullptr) - : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {} -}; -} // end anonymous namespace - -static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, +static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, - const Query &, unsigned); -static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, + const SimplifyQuery &, unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse); -static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); -static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); + const SimplifyQuery &Q, unsigned MaxRecurse); +static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyCastInst(unsigned, Value *, Type *, - const Query &, unsigned); + const SimplifyQuery &, unsigned); /// For a boolean type or a vector of boolean type, return false or a vector /// with every element false. @@ -138,7 +124,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, - Instruction::BinaryOps OpcodeToExpand, const Query &Q, + Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -196,7 +182,7 @@ static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, /// Generic simplifications for associative binary operations. /// Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, - Value *LHS, Value *RHS, const Query &Q, + Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); @@ -295,7 +281,7 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, /// of the select results in the same value. Returns the common value if so, /// otherwise returns null. static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -367,7 +353,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, /// comparison by seeing whether both branches of the select result in the same /// value. Returns the common value if so, otherwise returns null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -449,7 +435,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// phi values yields the same result for every value. If so returns the common /// value, otherwise returns null. static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -492,7 +478,7 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, /// yields the same result every time. If so returns the common result, /// otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -527,7 +513,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, Value *&Op0, Value *&Op1, - const Query &Q) { + const SimplifyQuery &Q) { if (auto *CLHS = dyn_cast(Op0)) { if (auto *CRHS = dyn_cast(Op1)) return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); @@ -542,7 +528,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, /// Given operands for an Add, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; @@ -601,10 +587,15 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Query) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit); +} + /// \brief Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and @@ -679,7 +670,7 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, /// Given operands for a Sub, see if we can fold the result. /// If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q)) return C; @@ -703,10 +694,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return Op0; unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (KnownZero.isMaxSignedValue()) { + KnownBits Known(BitWidth); + computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. if (isNSW) @@ -813,14 +803,19 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); +} + /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; @@ -854,7 +849,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given operands for an FSub, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; @@ -886,7 +881,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given the operands for an FMul, see if we can fold the result static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) return C; @@ -903,7 +898,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given operands for a Mul, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q)) return C; @@ -963,34 +958,52 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFAddInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); +} + Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFSubInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit); +} + Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFMulInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); +} + Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyMulInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); } /// Check for common or similar folds of integer division or integer remainder. @@ -1047,7 +1060,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { /// Given operands for an SDiv or UDiv, see if we can fold the result. /// If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1103,7 +1116,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// Given operands for an SDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; @@ -1115,13 +1128,16 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifySDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a UDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; @@ -1143,12 +1159,15 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyUDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; @@ -1193,14 +1212,19 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFDivInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); +} + /// Given operands for an SRem or URem, see if we can fold the result. /// If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1231,7 +1255,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// Given operands for an SRem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; @@ -1243,13 +1267,16 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifySRemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a URem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; @@ -1271,12 +1298,15 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyURemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; @@ -1302,10 +1332,15 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFRemInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); +} + /// Returns true if a shift by \c Amount always yields undef. static bool isUndefShift(Value *Amount) { Constant *C = dyn_cast(Amount); @@ -1336,7 +1371,7 @@ static bool isUndefShift(Value *Amount) { /// Given operands for an Shl, LShr or AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, - Value *Op1, const Query &Q, unsigned MaxRecurse) { + Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1367,17 +1402,15 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // If any bits in the shift amount make that value greater than or equal to // the number of bits in the type, the shift is undefined. unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (KnownOne.getLimitedValue() >= BitWidth) + KnownBits Known(BitWidth); + computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.One.getLimitedValue() >= BitWidth) return UndefValue::get(Op0->getType()); // If all valid bits in the shift amount are known zero, the first operand is // unchanged. unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth); - APInt ShiftAmountMask = APInt::getLowBitsSet(BitWidth, NumValidShiftBits); - if ((KnownZero & ShiftAmountMask) == ShiftAmountMask) + if (Known.Zero.countTrailingOnes() >= NumValidShiftBits) return Op0; return nullptr; @@ -1386,7 +1419,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, /// \brief Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, - Value *Op1, bool isExact, const Query &Q, + Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse)) return V; @@ -1403,11 +1436,9 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); - APInt Op0KnownZero(BitWidth, 0); - APInt Op0KnownOne(BitWidth, 0); - computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC, - Q.CxtI, Q.DT); - if (Op0KnownOne[0]) + KnownBits Op0Known(BitWidth); + computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + if (Op0Known.One[0]) return Op0; } @@ -1417,7 +1448,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, /// Given operands for an Shl, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) return V; @@ -1437,14 +1468,19 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); +} + /// Given operands for an LShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; @@ -1462,14 +1498,19 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyLShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); +} + /// Given operands for an AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; @@ -1496,10 +1537,15 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyAShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); +} + static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, ICmpInst *UnsignedICmp, bool IsAnd) { Value *X, *Y; @@ -1575,6 +1621,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) return X; + // FIXME: This should be shared with or-of-icmps. // Look for this pattern: (icmp V, C0) & (icmp V, C1)). Type *ITy = Op0->getType(); ICmpInst::Predicate Pred0, Pred1; @@ -1584,10 +1631,16 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) { // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. - auto Range0 = ConstantRange::makeAllowedICmpRegion(Pred0, *C0); - auto Range1 = ConstantRange::makeAllowedICmpRegion(Pred1, *C1); + auto Range0 = ConstantRange::makeExactICmpRegion(Pred0, *C0); + auto Range1 = ConstantRange::makeExactICmpRegion(Pred1, *C1); if (Range0.intersectWith(Range1).isEmptySet()) return getFalse(ITy); + + // If a range is a superset of the other, the smaller set is all we need. + if (Range0.contains(Range1)) + return Op1; + if (Range1.contains(Range0)) + return Op0; } // (icmp (add V, C0), C1) & (icmp V, C0) @@ -1633,7 +1686,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { /// Given operands for an And, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q)) return C; @@ -1744,8 +1797,11 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyAndInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); } /// Commuted variants are assumed to be handled by calling this function again @@ -1830,7 +1886,7 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { /// Given operands for an Or, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q)) return C; @@ -1877,6 +1933,25 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, (A == Op0 || B == Op0)) return Constant::getAllOnesValue(Op0->getType()); + // (A & ~B) | (A ^ B) -> (A ^ B) + // (~B & A) | (A ^ B) -> (A ^ B) + // (A & ~B) | (B ^ A) -> (B ^ A) + // (~B & A) | (B ^ A) -> (B ^ A) + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && + (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) + return Op1; + + // Commute the 'or' operands. + // (A ^ B) | (A & ~B) -> (A ^ B) + // (A ^ B) | (~B & A) -> (A ^ B) + // (B ^ A) | (A & ~B) -> (B ^ A) + // (B ^ A) | (~B & A) -> (B ^ A) + if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && + (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) + return Op0; + if (auto *ICILHS = dyn_cast(Op0)) { if (auto *ICIRHS = dyn_cast(Op1)) { if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS)) @@ -1952,13 +2027,16 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyOrInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a Xor, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) return C; @@ -2001,10 +2079,14 @@ Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyXorInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit); +} + + static Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } @@ -2238,7 +2320,7 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, /// Fold an icmp when its operands have i1 scalar type. static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q) { + Value *RHS, const SimplifyQuery &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. if (!OpTy->getScalarType()->isIntegerTy(1)) @@ -2301,7 +2383,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, /// Try hard to fold icmp with zero RHS because this is a common case. static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q) { + Value *RHS, const SimplifyQuery &Q) { if (!match(RHS, m_Zero())) return nullptr; @@ -2556,7 +2638,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, } static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. @@ -2866,7 +2948,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, /// Simplify integer comparisons where at least one operand of the compare /// matches an integer min/max idiom. static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. Value *A, *B; @@ -3070,7 +3152,7 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, /// Given operands for an ICmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); @@ -3342,11 +3424,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const APInt *RHSVal; if (match(RHS, m_APInt(RHSVal))) { unsigned BitWidth = RHSVal->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC, - Q.CxtI, Q.DT); - if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0)) + KnownBits LHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.Zero.intersects(*RHSVal) || + !LHSKnown.One.isSubsetOf(*RHSVal)) return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) : ConstantInt::getTrue(ITy); } @@ -3372,14 +3453,19 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyICmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); +} + /// Given operands for an FCmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const Query &Q, + FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); @@ -3505,13 +3591,18 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI}, + RecursionLimit); +} + +Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } /// See if V simplifies when its operand Op is replaced with RepOp. static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const Query &Q, + const SimplifyQuery &Q, unsigned MaxRecurse) { // Trivial replacement. if (V == Op) @@ -3659,7 +3750,7 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal, /// Try to simplify a select instruction when its condition operand is an /// integer comparison. static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, - Value *FalseVal, const Query &Q, + Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { ICmpInst::Predicate Pred; Value *CmpLHS, *CmpRHS; @@ -3738,7 +3829,7 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, - Value *FalseVal, const Query &Q, + Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { // select true, X, Y -> X // select false, X, Y -> Y @@ -3775,14 +3866,19 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, {DL, TLI, DT, AC, CxtI}, + RecursionLimit); +} + +Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const SimplifyQuery &Q) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); } /// Given operands for an GetElementPtrInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { // The type of the GEP pointer operand. unsigned AS = cast(Ops[0]->getType()->getScalarType())->getAddressSpace(); @@ -3896,14 +3992,18 @@ Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyGEPInst(SrcTy, Ops, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyGEPInst(SrcTy, Ops, {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, + const SimplifyQuery &Q) { + return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit); } /// Given operands for an InsertValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef Idxs, const Query &Q, + ArrayRef Idxs, const SimplifyQuery &Q, unsigned) { if (Constant *CAgg = dyn_cast(Agg)) if (Constant *CVal = dyn_cast(Val)) @@ -3933,14 +4033,20 @@ Value *llvm::SimplifyInsertValueInst( Value *Agg, Value *Val, ArrayRef Idxs, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyInsertValueInst(Agg, Val, Idxs, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef Idxs, + const SimplifyQuery &Q) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); +} + /// Given operands for an ExtractValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, - const Query &, unsigned) { + const SimplifyQuery &, unsigned) { if (auto *CAgg = dyn_cast(Agg)) return ConstantFoldExtractValueInstruction(CAgg, Idxs); @@ -3968,13 +4074,18 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyExtractValueInst(Agg, Idxs, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, + const SimplifyQuery &Q) { + return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); +} + /// Given operands for an ExtractElementInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, +static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &, unsigned) { if (auto *CVec = dyn_cast(Vec)) { if (auto *CIdx = dyn_cast(Idx)) @@ -4000,12 +4111,17 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, Value *llvm::SimplifyExtractElementInst( Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyExtractElementInst(Vec, Idx, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q) { + return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); +} + /// See if we can fold the given phi. If not, returns null. -static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { +static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = nullptr; @@ -4038,7 +4154,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { } static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, - Type *Ty, const Query &Q, unsigned MaxRecurse) { + Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) { if (auto *C = dyn_cast(Op)) return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); @@ -4076,10 +4192,15 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyCastInst(CastOpc, Op, Ty, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q) { + return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); +} + /// For the given destination element of a shuffle, peek through shuffles to /// match a root vector source operand that contains that element in the same /// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). @@ -4135,7 +4256,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, } static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const Query &Q, + Type *RetTy, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *InVecTy = Op0->getType(); unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); @@ -4207,8 +4328,13 @@ Value *llvm::SimplifyShuffleVectorInst( Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShuffleVectorInst( - Op0, Op1, Mask, RetTy, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, + {DL, TLI, DT, AC, CxtI}, RecursionLimit); +} + +Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q) { + return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } //=== Helper functions for higher up the class hierarchy. @@ -4216,7 +4342,7 @@ Value *llvm::SimplifyShuffleVectorInst( /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); @@ -4264,7 +4390,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const Query &Q, + const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: @@ -4284,22 +4410,32 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyBinOp(Opcode, LHS, RHS, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); +} + Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const DataLayout &DL, + FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); +} + /// Given operands for a CmpInst, see if we can fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); @@ -4309,10 +4445,15 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyCmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); +} + static bool IsIdempotent(Intrinsic::ID ID) { switch (ID) { default: return false; @@ -4403,7 +4544,7 @@ static bool maskIsAllZeroOrUndef(Value *Mask) { template static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { Intrinsic::ID IID = F->getIntrinsicID(); unsigned NumOperands = std::distance(ArgBegin, ArgEnd); @@ -4497,7 +4638,7 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, template static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { Type *Ty = V->getType(); if (PointerType *PTy = dyn_cast(Ty)) Ty = PTy->getElementType(); @@ -4535,16 +4676,26 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI), + return ::SimplifyCall(V, ArgBegin, ArgEnd, {DL, TLI, DT, AC, CxtI}, RecursionLimit); } +Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const SimplifyQuery &Q) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit); +} + Value *llvm::SimplifyCall(Value *V, ArrayRef Args, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCall(V, Args.begin(), Args.end(), - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyCall(V, Args.begin(), Args.end(), {DL, TLI, DT, AC, CxtI}, + RecursionLimit); +} + +Value *llvm::SimplifyCall(Value *V, ArrayRef Args, + const SimplifyQuery &Q) { + return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit); } /// See if we can compute a simplified version of this instruction. @@ -4553,152 +4704,141 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE) { + return SimplifyInstruction(I, {DL, TLI, DT, AC, I}, ORE); +} + +Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE) { Value *Result; switch (I->getOpcode()) { default: - Result = ConstantFoldInstruction(I, DL, TLI); + Result = ConstantFoldInstruction(I, Q.DL, Q.TLI); break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), - cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), - cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Mul: - Result = - SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FRem: Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), - cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), - cast(I)->isExact(), DL, TLI, DT, - AC, I); + cast(I)->isExact(), Q); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), - cast(I)->isExact(), DL, TLI, DT, - AC, I); + cast(I)->isExact(), Q); break; case Instruction::And: - Result = - SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Or: - Result = - SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Xor: - Result = - SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::ICmp: - Result = - SimplifyICmpInst(cast(I)->getPredicate(), I->getOperand(0), - I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyICmpInst(cast(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FCmp: - Result = SimplifyFCmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + Result = + SimplifyFCmpInst(cast(I)->getPredicate(), I->getOperand(0), + I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), DL, TLI, DT, AC, I); + I->getOperand(2), Q); break; case Instruction::GetElementPtr: { - SmallVector Ops(I->op_begin(), I->op_end()); + SmallVector Ops(I->op_begin(), I->op_end()); Result = SimplifyGEPInst(cast(I)->getSourceElementType(), - Ops, DL, TLI, DT, AC, I); + Ops, Q); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), DL, TLI, DT, AC, I); + IV->getIndices(), Q); break; } case Instruction::ExtractValue: { auto *EVI = cast(I); Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), - EVI->getIndices(), DL, TLI, DT, AC, I); + EVI->getIndices(), Q); break; } case Instruction::ExtractElement: { auto *EEI = cast(I); - Result = SimplifyExtractElementInst( - EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I); + Result = SimplifyExtractElementInst(EEI->getVectorOperand(), + EEI->getIndexOperand(), Q); break; } case Instruction::ShuffleVector: { auto *SVI = cast(I); Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), - SVI->getMask(), SVI->getType(), DL, TLI, - DT, AC, I); + SVI->getMask(), SVI->getType(), Q); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast(I), Query(DL, TLI, DT, AC, I)); + Result = SimplifyPHINode(cast(I), Q); break; case Instruction::Call: { CallSite CS(cast(I)); - Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL, - TLI, DT, AC, I); + Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST - Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), - DL, TLI, DT, AC, I); + Result = + SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), Q); break; case Instruction::Alloca: // No simplifications for Alloca and it can't be constant folded. @@ -4710,11 +4850,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, // value even when the operands are not all constants. if (!Result && I->getType()->isIntOrIntVectorTy()) { unsigned BitWidth = I->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT, ORE); - if ((KnownZero | KnownOne).isAllOnesValue()) - Result = ConstantInt::get(I->getType(), KnownOne); + KnownBits Known(BitWidth); + computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); + if ((Known.Zero | Known.One).isAllOnesValue()) + Result = ConstantInt::get(I->getType(), Known.One); } /// If called on unreachable code, the above logic may report that the diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 2ca46b1fe872..0f04af54cdc7 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -70,6 +70,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -534,10 +535,9 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, - dyn_cast(V), DT); - return KnownZero.isAllOnesValue(); + KnownBits Known(BitWidth); + computeKnownBits(V, Known, DL, 0, AC, dyn_cast(V), DT); + return Known.Zero.isAllOnesValue(); } // Per-component check doesn't work with zeroinitializer @@ -556,9 +556,9 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, if (isa(Elem)) return true; - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Elem, KnownZero, KnownOne, DL); - if (KnownZero.isAllOnesValue()) + KnownBits Known(BitWidth); + computeKnownBits(Elem, Known, DL); + if (Known.Zero.isAllOnesValue()) return true; } diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp index c63677fe5502..da5c79ab6c81 100644 --- a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -29,7 +29,7 @@ #define DEBUG_TYPE "memoryssa" using namespace llvm; -namespace llvm { + // This is the marker algorithm from "Simple and Efficient Construction of // Static Single Assignment Form" // The simple, non-marker algorithm places phi nodes at any join @@ -211,8 +211,8 @@ void MemorySSAUpdater::insertUse(MemoryUse *MU) { } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. -void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, - MemoryAccess *NewDef) { +static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, + MemoryAccess *NewDef) { // Replace any operand with us an incoming block with the new defining // access. int i = MP->getBasicBlockIndex(BB); @@ -415,6 +415,7 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) { } return MA; } + void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { assert(!MSSA->isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); @@ -490,5 +491,3 @@ MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessAfter( ++InsertPt->getIterator()); return NewAccess; } - -} // namespace llvm diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 700c383a9dd4..3ac4bf1276eb 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -89,6 +89,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SaveAndRestore.h" @@ -4575,10 +4576,10 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, + KnownBits Known(BitWidth); + computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC, nullptr, &DT); - return Zeros.countTrailingOnes(); + return Known.Zero.countTrailingOnes(); } // SCEVUDivExpr @@ -4757,11 +4758,12 @@ ScalarEvolution::getRange(const SCEV *S, const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT); - if (Ones != ~Zeros + 1) + KnownBits Known(BitWidth); + computeKnownBits(U->getValue(), Known, DL, 0, &AC, nullptr, &DT); + if (Known.One != ~Known.Zero + 1) ConservativeResult = - ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); + ConservativeResult.intersectWith(ConstantRange(Known.One, + ~Known.Zero + 1)); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); @@ -5292,13 +5294,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned LZ = A.countLeadingZeros(); unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(), + KnownBits Known(BitWidth); + computeKnownBits(BO->LHS, Known, getDataLayout(), 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); - if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { + if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) { const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); const SCEV *LHS = getSCEV(BO->LHS); const SCEV *ShiftedLHS = nullptr; @@ -5328,12 +5330,28 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case Instruction::Or: - // Use ValueTracking to check whether this is actually an add. - if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC, - nullptr, &DT)) { - // There aren't any common bits set, so the add can't wrap. - auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW); - return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); + // If the RHS of the Or is a constant, we may have something like: + // X*4+1 which got turned into X*4|1. Handle this as an Add so loop + // optimizations will transparently handle this case. + // + // In order for this transformation to be safe, the LHS must be of the + // form X*(2^n) and the Or constant must be less than 2^n. + if (ConstantInt *CI = dyn_cast(BO->RHS)) { + const SCEV *LHS = getSCEV(BO->LHS); + const APInt &CIVal = CI->getValue(); + if (GetMinTrailingZeros(LHS) >= + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast(S)) { + const SCEVAddRecExpr *OldAR = cast(LHS); + const_cast(NewAR)->setNoWrapFlags( + OldAR->getNoWrapFlags()); + } + return S; + } } break; @@ -6063,24 +6081,74 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, return getCouldNotCompute(); } -ScalarEvolution::ExitLimit -ScalarEvolution::computeExitLimitFromCond(const Loop *L, - Value *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB, - bool ControlsExit, - bool AllowPredicates) { +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( + const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, bool AllowPredicates) { + ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates); + return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB, + ControlsExit, AllowPredicates); +} + +Optional +ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, bool AllowPredicates) { + (void)this->L; + (void)this->TBB; + (void)this->FBB; + (void)this->AllowPredicates; + + assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + this->AllowPredicates == AllowPredicates && + "Variance in assumed invariant key components!"); + auto Itr = TripCountMap.find({ExitCond, ControlsExit}); + if (Itr == TripCountMap.end()) + return None; + return Itr->second; +} + +void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, + bool AllowPredicates, + const ExitLimit &EL) { + assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + this->AllowPredicates == AllowPredicates && + "Variance in assumed invariant key components!"); + + auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL}); + assert(InsertResult.second && "Expected successful insertion!"); + (void)InsertResult; +} + +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { + + if (auto MaybeEL = + Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates)) + return *MaybeEL; + + ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB, + ControlsExit, AllowPredicates); + Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL); + return EL; +} + +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); - ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); - ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); + ExitLimit EL0 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); + ExitLimit EL1 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -6124,12 +6192,12 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); - ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); - ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); + ExitLimit EL0 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); + ExitLimit EL1 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -10221,84 +10289,75 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } -typedef DenseMap VerifyMap; - -/// replaceSubString - Replaces all occurrences of From in Str with To. -static void replaceSubString(std::string &Str, StringRef From, StringRef To) { - size_t Pos = 0; - while ((Pos = Str.find(From, Pos)) != std::string::npos) { - Str.replace(Pos, From.size(), To.data(), To.size()); - Pos += To.size(); - } -} - -/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. -static void -getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { - std::string &S = Map[L]; - if (S.empty()) { - raw_string_ostream OS(S); - SE.getBackedgeTakenCount(L)->print(OS); - - // false and 0 are semantically equivalent. This can happen in dead loops. - replaceSubString(OS.str(), "false", "0"); - // Remove wrap flags, their use in SCEV is highly fragile. - // FIXME: Remove this when SCEV gets smarter about them. - replaceSubString(OS.str(), "", ""); - replaceSubString(OS.str(), "", ""); - replaceSubString(OS.str(), "", ""); - } - - for (auto *R : reverse(*L)) - getLoopBackedgeTakenCounts(R, Map, SE); // recurse. -} - void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast(this); - - // Gather stringified backedge taken counts for all loops using SCEV's caches. - // FIXME: It would be much better to store actual values instead of strings, - // but SCEV pointers will change if we drop the caches. - VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; - for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); - - // Gather stringified backedge taken counts for all loops using a fresh - // ScalarEvolution object. ScalarEvolution SE2(F, TLI, AC, DT, LI); - for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2); - // Now compare whether they're the same with and without caches. This allows - // verifying that no pass changed the cache. - assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && - "New loops suddenly appeared!"); + SmallVector LoopStack(LI.begin(), LI.end()); - for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), - OldE = BackedgeDumpsOld.end(), - NewI = BackedgeDumpsNew.begin(); - OldI != OldE; ++OldI, ++NewI) { - assert(OldI->first == NewI->first && "Loop order changed!"); + // Map's SCEV expressions from one ScalarEvolution "universe" to another. + struct SCEVMapper : public SCEVRewriteVisitor { + const SCEV *visitConstant(const SCEVConstant *Constant) { + return SE.getConstant(Constant->getAPInt()); + } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return SE.getUnknown(Expr->getValue()); + } - // Compare the stringified SCEVs. We don't care if undef backedgetaken count - // changes. - // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This - // means that a pass is buggy or SCEV has to learn a new pattern but is - // usually not harmful. - if (OldI->second != NewI->second && - OldI->second.find("undef") == std::string::npos && - NewI->second.find("undef") == std::string::npos && - OldI->second != "***COULDNOTCOMPUTE***" && - NewI->second != "***COULDNOTCOMPUTE***") { - dbgs() << "SCEVValidator: SCEV for loop '" - << OldI->first->getHeader()->getName() - << "' changed from '" << OldI->second - << "' to '" << NewI->second << "'!\n"; + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return SE.getCouldNotCompute(); + } + SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} + }; + + SCEVMapper SCM(SE2); + + while (!LoopStack.empty()) { + auto *L = LoopStack.pop_back_val(); + LoopStack.insert(LoopStack.end(), L->begin(), L->end()); + + auto *CurBECount = SCM.visit( + const_cast(this)->getBackedgeTakenCount(L)); + auto *NewBECount = SE2.getBackedgeTakenCount(L); + + if (CurBECount == SE2.getCouldNotCompute() || + NewBECount == SE2.getCouldNotCompute()) { + // NB! This situation is legal, but is very suspicious -- whatever pass + // change the loop to make a trip count go from could not compute to + // computable or vice-versa *should have* invalidated SCEV. However, we + // choose not to assert here (for now) since we don't want false + // positives. + continue; + } + + if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) { + // SCEV treats "undef" as an unknown but consistent value (i.e. it does + // not propagate undef aggressively). This means we can (and do) fail + // verification in cases where a transform makes the trip count of a loop + // go from "undef" to "undef+1" (say). The transform is fine, since in + // both cases the loop iterates "undef" times, but SCEV thinks we + // increased the trip count of the loop by 1 incorrectly. + continue; + } + + if (SE.getTypeSizeInBits(CurBECount->getType()) > + SE.getTypeSizeInBits(NewBECount->getType())) + NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); + else if (SE.getTypeSizeInBits(CurBECount->getType()) < + SE.getTypeSizeInBits(NewBECount->getType())) + CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); + + auto *ConstantDelta = + dyn_cast(SE2.getMinusSCEV(CurBECount, NewBECount)); + + if (ConstantDelta && ConstantDelta->getAPInt() != 0) { + dbgs() << "Trip Count Changed!\n"; + dbgs() << "Old: " << *CurBECount << "\n"; + dbgs() << "New: " << *NewBECount << "\n"; + dbgs() << "Delta: " << *ConstantDelta << "\n"; std::abort(); } } - - // TODO: Verify more things. } bool ScalarEvolution::invalidate( diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index 2aaa4c1ae117..54c44c8e542d 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -51,40 +51,47 @@ NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) { transform(AR->operands(), std::back_inserter(Operands), [&](const SCEV *Op) { return visit(Op); }); - // Conservatively use AnyWrap until/unless we need FlagNW. - const SCEV *Result = - SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); - switch (Kind) { - case Normalize: - // We want to normalize step expression, because otherwise we might not be - // able to denormalize to the original expression. + if (!Pred(AR)) + return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); + + // Normalization and denormalization are fancy names for decrementing and + // incrementing a SCEV expression with respect to a set of loops. Since + // Pred(AR) has returned true, we know we need to normalize or denormalize AR + // with respect to its loop. + + if (Kind == Denormalize) { + // Denormalization / "partial increment" is essentially the same as \c + // SCEVAddRecExpr::getPostIncExpr. Here we use an explicit loop to make the + // symmetry with Normalization clear. + for (int i = 0, e = Operands.size() - 1; i < e; i++) + Operands[i] = SE.getAddExpr(Operands[i], Operands[i + 1]); + } else { + assert(Kind == Normalize && "Only two possibilities!"); + + // Normalization / "partial decrement" is a bit more subtle. Since + // incrementing a SCEV expression (in general) changes the step of the SCEV + // expression as well, we cannot use the step of the current expression. + // Instead, we have to use the step of the very expression we're trying to + // compute! // - // Here is an example what will happen if we don't normalize step: - // ORIGINAL ISE: - // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25> - // NORMALIZED ISE: - // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+, - // (100 /u {0,+,1}<%bb16>)}<%bb25> - // DENORMALIZED BACK ISE: - // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+, - // (100 /u {1,+,1}<%bb16>)}<%bb25> - // Note that the initial value changes after normalization + - // denormalization, which isn't correct. - if (Pred(AR)) { - const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE)); - Result = SE.getMinusSCEV(Result, TransformedStep); - } - break; - case Denormalize: - // Here we want to normalize step expressions for the same reasons, as - // stated above. - if (Pred(AR)) { - const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE)); - Result = SE.getAddExpr(Result, TransformedStep); - } - break; + // We solve the issue by recursively building up the result, starting from + // the "least significant" operand in the add recurrence: + // + // Base case: + // Single operand add recurrence. It's its own normalization. + // + // N-operand case: + // {S_{N-1},+,S_{N-2},+,...,+,S_0} = S + // + // Since the step recurrence of S is {S_{N-2},+,...,+,S_0}, we know its + // normalization by induction. We subtract the normalized step + // recurrence from S_{N-1} to get the normalization of S. + + for (int i = Operands.size() - 2; i >= 0; i--) + Operands[i] = SE.getMinusSCEV(Operands[i], Operands[i + 1]); } - return Result; + + return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); } const SCEV *llvm::normalizeForPostIncUse(const SCEV *S, diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 900a2363e60d..af964b6259bb 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include #include @@ -130,15 +131,15 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { return nullptr; } -static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, +static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Query &Q); -void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, +void llvm::computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE) { - ::computeKnownBits(V, KnownZero, KnownOne, Depth, + ::computeKnownBits(V, Known, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } @@ -151,11 +152,11 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, assert(LHS->getType()->isIntOrIntVectorTy() && "LHS and RHS should be integers"); IntegerType *IT = cast(LHS->getType()->getScalarType()); - APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0); - APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT); - return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); + KnownBits LHSKnown(IT->getBitWidth()); + KnownBits RHSKnown(IT->getBitWidth()); + computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT); + return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, @@ -252,67 +253,65 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, + KnownBits &KnownOut, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = KnownOut.getBitWidth(); // If an initial sequence of bits in the result is not needed, the // corresponding bits in the operands are not needed. - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, Depth + 1, Q); - computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q); + KnownBits LHSKnown(BitWidth); + computeKnownBits(Op0, LHSKnown, Depth + 1, Q); + computeKnownBits(Op1, Known2, Depth + 1, Q); // Carry in a 1 for a subtract, rather than a 0. uint64_t CarryIn = 0; if (!Add) { // Sum = LHS + ~RHS + 1 - std::swap(KnownZero2, KnownOne2); + std::swap(Known2.Zero, Known2.One); CarryIn = 1; } - APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn; - APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn; + APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn; + APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn; // Compute known bits of the carry. - APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2); - APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2; + APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero); + APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One; // Compute set of known bits (where all three relevant bits are known). - APInt LHSKnown = LHSKnownZero | LHSKnownOne; - APInt RHSKnown = KnownZero2 | KnownOne2; - APInt CarryKnown = CarryKnownZero | CarryKnownOne; - APInt Known = LHSKnown & RHSKnown & CarryKnown; + APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One; + APInt RHSKnownUnion = Known2.Zero | Known2.One; + APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne; + APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion; assert((PossibleSumZero & Known) == (PossibleSumOne & Known) && "known bits of sum differ"); // Compute known bits of the result. - KnownZero = ~PossibleSumOne & Known; - KnownOne = PossibleSumOne & Known; + KnownOut.Zero = ~PossibleSumOne & Known; + KnownOut.One = PossibleSumOne & Known; // Are we still trying to solve for the sign bit? if (!Known.isSignBitSet()) { if (NSW) { // Adding two non-negative numbers, or subtracting a negative number from // a non-negative one, can't wrap into negative. - if (LHSKnownZero.isSignBitSet() && KnownZero2.isSignBitSet()) - KnownZero.setSignBit(); + if (LHSKnown.Zero.isSignBitSet() && Known2.Zero.isSignBitSet()) + KnownOut.Zero.setSignBit(); // Adding two negative numbers, or subtracting a non-negative number from // a negative one, can't wrap into non-negative. - else if (LHSKnownOne.isSignBitSet() && KnownOne2.isSignBitSet()) - KnownOne.setSignBit(); + else if (LHSKnown.One.isSignBitSet() && Known2.One.isSignBitSet()) + KnownOut.One.setSignBit(); } } } static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, + KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(Op0, KnownZero2, KnownOne2, Depth + 1, Q); + unsigned BitWidth = Known.getBitWidth(); + computeKnownBits(Op1, Known, Depth + 1, Q); + computeKnownBits(Op0, Known2, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -322,10 +321,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // The product of a number with itself is non-negative. isKnownNonNegative = true; } else { - bool isKnownNonNegativeOp1 = KnownZero.isSignBitSet(); - bool isKnownNonNegativeOp0 = KnownZero2.isSignBitSet(); - bool isKnownNegativeOp1 = KnownOne.isSignBitSet(); - bool isKnownNegativeOp0 = KnownOne2.isSignBitSet(); + bool isKnownNonNegativeOp1 = Known.Zero.isSignBitSet(); + bool isKnownNonNegativeOp0 = Known2.Zero.isSignBitSet(); + bool isKnownNegativeOp1 = Known.One.isSignBitSet(); + bool isKnownNegativeOp0 = Known2.One.isSignBitSet(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); @@ -343,28 +342,28 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), + Known.One.clearAllBits(); + unsigned TrailZ = Known.Zero.countTrailingOnes() + + Known2.Zero.countTrailingOnes(); + unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() + + Known2.Zero.countLeadingOnes(), BitWidth) - BitWidth; TrailZ = std::min(TrailZ, BitWidth); LeadZ = std::min(LeadZ, BitWidth); - KnownZero.clearAllBits(); - KnownZero.setLowBits(TrailZ); - KnownZero.setHighBits(LeadZ); + Known.Zero.clearAllBits(); + Known.Zero.setLowBits(TrailZ); + Known.Zero.setHighBits(LeadZ); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in // which case we prefer to follow the result of the direct computation, // though as the program is invoking undefined behaviour we can choose // whatever we like here. - if (isKnownNonNegative && !KnownOne.isSignBitSet()) - KnownZero.setSignBit(); - else if (isKnownNegative && !KnownZero.isSignBitSet()) - KnownOne.setSignBit(); + if (isKnownNonNegative && !Known.One.isSignBitSet()) + Known.Zero.setSignBit(); + else if (isKnownNegative && !Known.Zero.isSignBitSet()) + Known.One.setSignBit(); } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, @@ -499,15 +498,14 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return !isEphemeralValueOf(Inv, CxtI); } -static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - const Query &Q) { +static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, + unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! if (!Q.AC || !Q.CxtI) return; - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); // Note that the patterns below need to be kept in sync with the code // in AssumptionCache::updateAffectedValues. @@ -532,15 +530,15 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); - KnownZero.clearAllBits(); - KnownOne.setAllBits(); + Known.Zero.clearAllBits(); + Known.One.setAllBits(); return; } if (match(Arg, m_Not(m_Specific(V))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); - KnownZero.setAllBits(); - KnownOne.clearAllBits(); + Known.Zero.setAllBits(); + Known.One.clearAllBits(); return; } @@ -558,126 +556,126 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - KnownZero |= RHSKnownZero; - KnownOne |= RHSKnownOne; + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + Known.Zero |= RHSKnown.Zero; + Known.One |= RHSKnown.One; // assume(v & b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. - KnownZero |= RHSKnownZero & MaskKnownOne; - KnownOne |= RHSKnownOne & MaskKnownOne; + Known.Zero |= RHSKnown.Zero & MaskKnown.One; + Known.One |= RHSKnown.One & MaskKnown.One; // assume(~(v & b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. - KnownZero |= RHSKnownOne & MaskKnownOne; - KnownOne |= RHSKnownZero & MaskKnownOne; + Known.Zero |= RHSKnown.One & MaskKnown.One; + Known.One |= RHSKnown.Zero & MaskKnown.One; // assume(v | b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. - KnownZero |= RHSKnownZero & BKnownZero; - KnownOne |= RHSKnownOne & BKnownZero; + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; // assume(~(v | b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. - KnownZero |= RHSKnownOne & BKnownZero; - KnownOne |= RHSKnownZero & BKnownZero; + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; // assume(v ^ b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, // we can propagate inverted known bits from the RHS to V. - KnownZero |= RHSKnownZero & BKnownZero; - KnownOne |= RHSKnownOne & BKnownZero; - KnownZero |= RHSKnownOne & BKnownOne; - KnownOne |= RHSKnownZero & BKnownOne; + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; + Known.Zero |= RHSKnown.One & BKnown.One; + Known.One |= RHSKnown.Zero & BKnown.One; // assume(~(v ^ b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are // known to be one, we can propagate known bits from the RHS to V. - KnownZero |= RHSKnownOne & BKnownZero; - KnownOne |= RHSKnownZero & BKnownZero; - KnownZero |= RHSKnownZero & BKnownOne; - KnownOne |= RHSKnownOne & BKnownOne; + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; + Known.Zero |= RHSKnown.Zero & BKnown.One; + Known.One |= RHSKnown.One & BKnown.One; // assume(v << c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - RHSKnownZero.lshrInPlace(C->getZExtValue()); - KnownZero |= RHSKnownZero; - RHSKnownOne.lshrInPlace(C->getZExtValue()); - KnownOne |= RHSKnownOne; + RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + Known.Zero |= RHSKnown.Zero; + RHSKnown.One.lshrInPlace(C->getZExtValue()); + Known.One |= RHSKnown.One; // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - RHSKnownOne.lshrInPlace(C->getZExtValue()); - KnownZero |= RHSKnownOne; - RHSKnownZero.lshrInPlace(C->getZExtValue()); - KnownOne |= RHSKnownZero; + RHSKnown.One.lshrInPlace(C->getZExtValue()); + Known.Zero |= RHSKnown.One; + RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), @@ -685,12 +683,12 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - KnownZero |= RHSKnownZero << C->getZExtValue(); - KnownOne |= RHSKnownOne << C->getZExtValue(); + Known.Zero |= RHSKnown.Zero << C->getZExtValue(); + Known.One |= RHSKnown.One << C->getZExtValue(); // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( m_LShr(m_V, m_ConstantInt(C)), @@ -698,78 +696,78 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - KnownZero |= RHSKnownOne << C->getZExtValue(); - KnownOne |= RHSKnownZero << C->getZExtValue(); + Known.Zero |= RHSKnown.One << C->getZExtValue(); + Known.One |= RHSKnown.Zero << C->getZExtValue(); // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownZero.isSignBitSet()) { + if (RHSKnown.Zero.isSignBitSet()) { // We know that the sign bit is zero. - KnownZero.setSignBit(); + Known.Zero.setSignBit(); } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isSignBitSet()) { + if (RHSKnown.One.isAllOnesValue() || RHSKnown.Zero.isSignBitSet()) { // We know that the sign bit is zero. - KnownZero.setSignBit(); + Known.Zero.setSignBit(); } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownOne.isSignBitSet()) { + if (RHSKnown.One.isSignBitSet()) { // We know that the sign bit is one. - KnownOne.setSignBit(); + Known.One.setSignBit(); } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isSignBitSet()) { + if (RHSKnown.Zero.isAllOnesValue() || RHSKnown.One.isSignBitSet()) { // We know that the sign bit is one. - KnownOne.setSignBit(); + Known.One.setSignBit(); } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); + Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()); // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()+1); + Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()+1); else - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); + Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()); } } @@ -778,9 +776,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // so this isn't a real bug. On the other hand, the program may have undefined // behavior, or we might have a bug in the compiler. We can't assert/crash, so // clear out the known bits, try to warn the user, and hope for the best. - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if (Known.Zero.intersects(Known.One)) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); if (Q.ORE) { auto *CxtI = const_cast(Q.CxtI); @@ -793,57 +791,57 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } // Compute known bits from a shift operator, including those with a -// non-constant shift amount. KnownZero and KnownOne are the outputs of this -// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the -// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific -// functors that, given the known-zero or known-one bits respectively, and a -// shift amount, compute the implied known-zero or known-one bits of the shift -// operator's result respectively for that shift amount. The results from calling -// KZF and KOF are conservatively combined for all permitted shift amounts. +// non-constant shift amount. Known is the outputs of this function. Known2 is a +// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are +// operator-specific functors that, given the known-zero or known-one bits +// respectively, and a shift amount, compute the implied known-zero or known-one +// bits of the shift operator's result respectively for that shift amount. The +// results from calling KZF and KOF are conservatively combined for all +// permitted shift amounts. static void computeKnownBitsFromShiftOperator( - const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, - APInt &KnownOne2, unsigned Depth, const Query &Q, + const Operator *I, KnownBits &Known, KnownBits &Known2, + unsigned Depth, const Query &Q, function_ref KZF, function_ref KOF) { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); if (auto *SA = dyn_cast(I->getOperand(1))) { unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KZF(KnownZero, ShiftAmt); - KnownOne = KOF(KnownOne, ShiftAmt); - // If there is conflict between KnownZero and KnownOne, this must be an - // overflowing left shift, so the shift result is undefined. Clear KnownZero - // and KnownOne bits so that other code could propagate this undef. - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero = KZF(Known.Zero, ShiftAmt); + Known.One = KOF(Known.One, ShiftAmt); + // If there is conflict between Known.Zero and Known.One, this must be an + // overflowing left shift, so the shift result is undefined. Clear Known + // bits so that other code could propagate this undef. + if ((Known.Zero & Known.One) != 0) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); } return; } - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); // If the shift amount could be greater than or equal to the bit-width of the LHS, the // value could be undef, so we don't know anything about it. - if ((~KnownZero).uge(BitWidth)) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if ((~Known.Zero).uge(BitWidth)) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); return; } - // Note: We cannot use KnownZero.getLimitedValue() here, because if + // Note: We cannot use Known.Zero.getLimitedValue() here, because if // BitWidth > 64 and any upper bits are known, we'll end up returning the // limit value (which implies all bits are known). - uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); - uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); // It would be more-clearly correct to use the two temporaries for this // calculation. Reusing the APInts here to prevent unnecessary allocations. - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); // If we know the shifter operand is nonzero, we can sometimes infer more // known bits. However this is expensive to compute, so be lazy about it and @@ -858,9 +856,10 @@ static void computeKnownBitsFromShiftOperator( return; } - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { // Combine the shifted known input bits only for those shift amounts // compatible with its known constraints. @@ -879,8 +878,8 @@ static void computeKnownBitsFromShiftOperator( continue; } - KnownZero &= KZF(KnownZero2, ShiftAmt); - KnownOne &= KOF(KnownOne2, ShiftAmt); + Known.Zero &= KZF(Known2.Zero, ShiftAmt); + Known.One &= KOF(Known2.One, ShiftAmt); } // If there are no compatible shift amounts, then we've proven that the shift @@ -888,33 +887,32 @@ static void computeKnownBitsFromShiftOperator( // return anything we'd like, but we need to make sure the sets of known bits // stay disjoint (it should be better for some other code to actually // propagate the undef than to pick a value here using known bits). - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if (Known.Zero.intersects(Known.One)) { + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); } } -static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); +static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, + unsigned Depth, const Query &Q) { + unsigned BitWidth = Known.getBitWidth(); - APInt KnownZero2(KnownZero), KnownOne2(KnownOne); + KnownBits Known2(Known); switch (I->getOpcode()) { default: break; case Instruction::Load: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; // and(x, add (x, -1)) is a common idiom that always clears the low bit; // here we handle the more general case of adding any odd number by @@ -922,115 +920,115 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // TODO: This could be generalized to clearing any bit set in y where the // following bit is known to be unset in y. Value *Y = nullptr; - if (!KnownZero[0] && !KnownOne[0] && + if (!Known.Zero[0] && !Known.One[0] && (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), m_Value(Y))) || match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), m_Value(Y))))) { - KnownZero2.clearAllBits(); KnownOne2.clearAllBits(); - computeKnownBits(Y, KnownZero2, KnownOne2, Depth + 1, Q); - if (KnownOne2.countTrailingOnes() > 0) - KnownZero.setBit(0); + Known2.Zero.clearAllBits(); Known2.One.clearAllBits(); + computeKnownBits(Y, Known2, Depth + 1, Q); + if (Known2.One.countTrailingOnes() > 0) + Known.Zero.setBit(0); } break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = std::move(KnownZeroOut); + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = std::move(KnownZeroOut); break; } case Instruction::Mul: { bool NSW = cast(I)->hasNoSignedWrap(); - computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known, + Known2, Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - unsigned LeadZ = KnownZero2.countLeadingOnes(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + unsigned LeadZ = Known2.Zero.countLeadingOnes(); - KnownOne2.clearAllBits(); - KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); - unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + Known2.One.clearAllBits(); + Known2.Zero.clearAllBits(); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero.setHighBits(LeadZ); + Known.Zero.setHighBits(LeadZ); break; } case Instruction::Select: { const Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { - computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(RHS, Known, Depth + 1, Q); + computeKnownBits(LHS, Known2, Depth + 1, Q); } else { - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); } unsigned MaxHighOnes = 0; unsigned MaxHighZeros = 0; if (SPF == SPF_SMAX) { // If both sides are negative, the result is negative. - if (KnownOne.isSignBitSet() && KnownOne2.isSignBitSet()) + if (Known.One.isSignBitSet() && Known2.One.isSignBitSet()) // We can derive a lower bound on the result by taking the max of the // leading one bits. - MaxHighOnes = - std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + MaxHighOnes = std::max(Known.One.countLeadingOnes(), + Known2.One.countLeadingOnes()); // If either side is non-negative, the result is non-negative. - else if (KnownZero.isSignBitSet() || KnownZero2.isSignBitSet()) + else if (Known.Zero.isSignBitSet() || Known2.Zero.isSignBitSet()) MaxHighZeros = 1; } else if (SPF == SPF_SMIN) { // If both sides are non-negative, the result is non-negative. - if (KnownZero.isSignBitSet() && KnownZero2.isSignBitSet()) + if (Known.Zero.isSignBitSet() && Known2.Zero.isSignBitSet()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. - MaxHighZeros = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); + MaxHighZeros = std::max(Known.Zero.countLeadingOnes(), + Known2.Zero.countLeadingOnes()); // If either side is negative, the result is negative. - else if (KnownOne.isSignBitSet() || KnownOne2.isSignBitSet()) + else if (Known.One.isSignBitSet() || Known2.One.isSignBitSet()) MaxHighOnes = 1; } else if (SPF == SPF_UMAX) { // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = - std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + std::max(Known.One.countLeadingOnes(), Known2.One.countLeadingOnes()); } else if (SPF == SPF_UMIN) { // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = - std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); + std::max(Known.Zero.countLeadingOnes(), Known2.Zero.countLeadingOnes()); } // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; if (MaxHighOnes > 0) - KnownOne.setHighBits(MaxHighOnes); + Known.One.setHighBits(MaxHighOnes); if (MaxHighZeros > 0) - KnownZero.setHighBits(MaxHighZeros); + Known.Zero.setHighBits(MaxHighZeros); break; } case Instruction::FPTrunc: @@ -1054,14 +1052,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType()); assert(SrcBitWidth && "SrcBitWidth can't be zero"); - KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); - KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KnownZero.zextOrTrunc(BitWidth); - KnownOne = KnownOne.zextOrTrunc(BitWidth); + Known.Zero = Known.Zero.zextOrTrunc(SrcBitWidth); + Known.One = Known.One.zextOrTrunc(SrcBitWidth); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero = Known.Zero.zextOrTrunc(BitWidth); + Known.One = Known.One.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) - KnownZero.setBitsFrom(SrcBitWidth); + Known.Zero.setBitsFrom(SrcBitWidth); break; } case Instruction::BitCast: { @@ -1070,7 +1068,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; } break; @@ -1079,13 +1077,13 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + Known.Zero = Known.Zero.trunc(SrcBitWidth); + Known.One = Known.One.trunc(SrcBitWidth); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - KnownZero = KnownZero.sext(BitWidth); - KnownOne = KnownOne.sext(BitWidth); + Known.Zero = Known.Zero.sext(BitWidth); + Known.One = Known.One.sext(BitWidth); break; } case Instruction::Shl: { @@ -1108,9 +1106,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, return KOResult; }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::LShr: { @@ -1126,9 +1122,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, return KnownOne.lshr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::AShr: { @@ -1141,23 +1135,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, return KnownOne.ashr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::Sub: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } case Instruction::Add: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } case Instruction::SRem: @@ -1165,34 +1155,33 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, - Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // The low bits of the first operand are unchanged by the srem. - KnownZero = KnownZero2 & LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero & LowBits; + Known.One = Known2.One & LowBits; // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2.isSignBitSet() || ((KnownZero2 & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (Known2.Zero.isSignBitSet() || ((Known2.Zero & LowBits) == LowBits)) + Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2.isSignBitSet() && ((KnownOne2 & LowBits) != 0)) - KnownOne |= ~LowBits; + if (Known2.One.isSignBitSet() && ((Known2.One & LowBits) != 0)) + Known.One |= ~LowBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If it's known zero, our sign bit is also zero. - if (KnownZero2.isSignBitSet()) - KnownZero.setSignBit(); + if (Known2.Zero.isSignBitSet()) + Known.Zero.setSignBit(); break; case Instruction::URem: { @@ -1200,23 +1189,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, const APInt &RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero |= ~LowBits; - KnownOne &= LowBits; + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero |= ~LowBits; + Known.One &= LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - unsigned Leaders = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); - KnownZero.setHighBits(Leaders); + unsigned Leaders = std::max(Known.Zero.countLeadingOnes(), + Known2.Zero.countLeadingOnes()); + Known.One.clearAllBits(); + Known.Zero.clearAllBits(); + Known.Zero.setHighBits(Leaders); break; } @@ -1227,16 +1216,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); if (Align > 0) - KnownZero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align)); break; } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. - APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, Depth + 1, - Q); - unsigned TrailZ = LocalKnownZero.countTrailingOnes(); + KnownBits LocalKnown(BitWidth); + computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); + unsigned TrailZ = LocalKnown.Zero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { @@ -1266,15 +1254,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy); - LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, Depth + 1, Q); + LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); + computeKnownBits(Index, LocalKnown, Depth + 1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + - LocalKnownZero.countTrailingOnes())); + LocalKnown.Zero.countTrailingOnes())); } } - KnownZero.setLowBits(TrailZ); + Known.Zero.setLowBits(TrailZ); break; } case Instruction::PHI: { @@ -1309,14 +1297,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(R, Known2, Depth + 1, Q); // We need to take the minimum number of known bits - APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q); + KnownBits Known3(Known); + computeKnownBits(L, Known3, Depth + 1, Q); - KnownZero.setLowBits(std::min(KnownZero2.countTrailingOnes(), - KnownZero3.countTrailingOnes())); + Known.Zero.setLowBits(std::min(Known2.Zero.countTrailingOnes(), + Known3.Zero.countTrailingOnes())); if (DontImproveNonNegativePhiBits) break; @@ -1333,25 +1321,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // (add non-negative, non-negative) --> non-negative // (add negative, negative) --> negative if (Opcode == Instruction::Add) { - if (KnownZero2.isSignBitSet() && KnownZero3.isSignBitSet()) - KnownZero.setSignBit(); - else if (KnownOne2.isSignBitSet() && KnownOne3.isSignBitSet()) - KnownOne.setSignBit(); + if (Known2.Zero.isSignBitSet() && Known3.Zero.isSignBitSet()) + Known.Zero.setSignBit(); + else if (Known2.One.isSignBitSet() && Known3.One.isSignBitSet()) + Known.One.setSignBit(); } // (sub nsw non-negative, negative) --> non-negative // (sub nsw negative, non-negative) --> negative else if (Opcode == Instruction::Sub && LL == I) { - if (KnownZero2.isSignBitSet() && KnownOne3.isSignBitSet()) - KnownZero.setSignBit(); - else if (KnownOne2.isSignBitSet() && KnownZero3.isSignBitSet()) - KnownOne.setSignBit(); + if (Known2.Zero.isSignBitSet() && Known3.One.isSignBitSet()) + Known.Zero.setSignBit(); + else if (Known2.One.isSignBitSet() && Known3.Zero.isSignBitSet()) + Known.One.setSignBit(); } // (mul nsw non-negative, non-negative) --> non-negative - else if (Opcode == Instruction::Mul && KnownZero2.isSignBitSet() && - KnownZero3.isSignBitSet()) - KnownZero.setSignBit(); + else if (Opcode == Instruction::Mul && Known2.Zero.isSignBitSet() && + Known3.Zero.isSignBitSet()) + Known.Zero.setSignBit(); } break; @@ -1365,27 +1353,26 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. - if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) { // Skip if every incoming value references to ourself. if (dyn_cast_or_null(P->hasConstantValue())) break; - KnownZero.setAllBits(); - KnownOne.setAllBits(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (Value *IncValue : P->incoming_values()) { // Skip direct self references. if (IncValue == P) continue; - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); + Known2 = KnownBits(BitWidth); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(IncValue, KnownZero2, KnownOne2, MaxDepth - 1, Q); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + computeKnownBits(IncValue, Known2, MaxDepth - 1, Q); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; // If all bits have been ruled out, there's no need to check // more operands. - if (!KnownZero && !KnownOne) + if (!Known.Zero && !Known.One) break; } } @@ -1397,24 +1384,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // and then intersect with known bits based on other properties of the // function. if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One); if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) { - computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2; - KnownOne |= KnownOne2; + computeKnownBits(RV, Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero; + Known.One |= Known2.One; } if (const IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bitreverse: - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2.reverseBits(); - KnownOne |= KnownOne2.reverseBits(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero.reverseBits(); + Known.One |= Known2.One.reverseBits(); break; case Intrinsic::bswap: - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2.byteSwap(); - KnownOne |= KnownOne2.byteSwap(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero.byteSwap(); + Known.One |= Known2.One.byteSwap(); break; case Intrinsic::ctlz: case Intrinsic::cttz: { @@ -1422,22 +1409,22 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero.setBitsFrom(LowBits); + Known.Zero.setBitsFrom(LowBits); break; } case Intrinsic::ctpop: { - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // We can bound the space the count needs. Also, bits known to be zero // can't contribute to the population. - unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); + unsigned BitsPossiblySet = BitWidth - Known2.Zero.countPopulation(); unsigned LowBits = Log2_32(BitsPossiblySet)+1; - KnownZero.setBitsFrom(LowBits); + Known.Zero.setBitsFrom(LowBits); // TODO: we could bound KnownOne using the lower bound on the number // of bits which might be set provided by popcnt KnownOne2. break; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero.setBitsFrom(32); + Known.Zero.setBitsFrom(32); break; } } @@ -1447,7 +1434,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // tracking the specific element. But at least we might find information // valid for all elements of the vector (for example if vector is sign // extended, shifted, etc). - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; case Instruction::ExtractValue: if (IntrinsicInst *II = dyn_cast(I->getOperand(0))) { @@ -1459,20 +1446,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + II->getArgOperand(1), false, Known, Known2, + Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + II->getArgOperand(1), false, Known, Known2, + Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } } @@ -1481,7 +1467,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } /// Determine which bits of V are known to be either zero or one and return -/// them in the KnownZero/KnownOne bit sets. +/// them in the Known bit set. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing @@ -1495,11 +1481,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth, const Query &Q) { +void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, + const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarType()->isPointerTy()) && @@ -1507,22 +1493,20 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "V, KnownOne and KnownZero should have same BitWidth"); + "V and Known should have same BitWidth"); (void)BitWidth; const APInt *C; if (match(V, m_APInt(C))) { // We know all of the bits for a scalar constant or a splat vector constant! - KnownOne = *C; - KnownZero = ~KnownOne; + Known.One = *C; + Known.Zero = ~Known.One; return; } // Null and aggregate-zero are all-zeros. if (isa(V) || isa(V)) { - KnownOne.clearAllBits(); - KnownZero.setAllBits(); + Known.One.clearAllBits(); + Known.Zero.setAllBits(); return; } // Handle a constant vector by taking the intersection of the known bits of @@ -1530,12 +1514,12 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, if (const ConstantDataSequential *CDS = dyn_cast(V)) { // We know that CDS must be a vector of integers. Take the intersection of // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); + Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { Elt = CDS->getElementAsInteger(i); - KnownZero &= ~Elt; - KnownOne &= Elt; + Known.Zero &= ~Elt; + Known.One &= Elt; } return; } @@ -1543,25 +1527,25 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, if (const auto *CV = dyn_cast(V)) { // We know that CV must be a vector of integers. Take the intersection of // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); + Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Element = CV->getAggregateElement(i); auto *ElementCI = dyn_cast_or_null(Element); if (!ElementCI) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); return; } Elt = ElementCI->getValue(); - KnownZero &= ~Elt; - KnownOne &= Elt; + Known.Zero &= ~Elt; + Known.One &= Elt; } return; } // Start out not knowing anything. - KnownZero.clearAllBits(); KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); Known.One.clearAllBits(); // We can't imply anything about undefs. if (isa(V)) @@ -1580,27 +1564,27 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, // the bits of its aliasee. if (const GlobalAlias *GA = dyn_cast(V)) { if (!GA->isInterposable()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); return; } if (const Operator *I = dyn_cast(V)) - computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q); + computeKnownBitsFromOperator(I, Known, Depth, Q); - // Aligned pointers have trailing zeros - refine KnownZero set + // Aligned pointers have trailing zeros - refine Known.Zero set if (V->getType()->isPointerTy()) { unsigned Align = V->getPointerAlignment(Q.DL); if (Align) - KnownZero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align)); } - // computeKnownBitsFromAssume strictly refines KnownZero and - // KnownOne. Therefore, we run them after computeKnownBitsFromOperator. + // computeKnownBitsFromAssume strictly refines Known. + // Therefore, we run them after computeKnownBitsFromOperator. // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q); + computeKnownBitsFromAssume(V, Known, Depth, Q); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } /// Determine whether the sign bit is known to be zero or one. @@ -1613,11 +1597,10 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, KnownOne = false; return; } - APInt ZeroBits(BitWidth, 0); - APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, Depth, Q); - KnownOne = OneBits.isSignBitSet(); - KnownZero = ZeroBits.isSignBitSet(); + KnownBits Bits(BitWidth); + computeKnownBits(V, Bits, Depth, Q); + KnownOne = Bits.One.isSignBitSet(); + KnownZero = Bits.Zero.isSignBitSet(); } /// Return true if the given value is known to have exactly one @@ -1689,18 +1672,18 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); - APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, Depth, Q); + KnownBits LHSBits(BitWidth); + computeKnownBits(X, LHSBits, Depth, Q); - APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, Depth, Q); + KnownBits RHSBits(BitWidth); + computeKnownBits(Y, RHSBits, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 - if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2()) + if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) // If OrZero isn't set, we cannot give back a zero result. // Make sure either the LHS or RHS has a bit set. - if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue()) + if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) return true; } } @@ -1871,10 +1854,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, Depth, Q); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); - if (KnownOne[0]) + KnownBits Known(BitWidth); + computeKnownBits(X, Known, Depth, Q); + if (Known.One[0]) return true; } // shr X, Y != 0 if X is negative. Note that the value of the shift is not @@ -1894,16 +1876,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // out are known to be zero, and X is known non-zero then at least one // non-zero bit must remain. if (ConstantInt *Shift = dyn_cast(Y)) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); + KnownBits Known(BitWidth); + computeKnownBits(X, Known, Depth, Q); auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); // Is there a known one in the portion not shifted out? - if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) + if (Known.One.countLeadingZeros() < BitWidth - ShiftVal) return true; // Are all the bits to be shifted out known zero? - if (KnownZero.countTrailingOnes() >= ShiftVal) + if (Known.Zero.countTrailingOnes() >= ShiftVal) return isKnownNonZero(X, Depth, Q); } } @@ -1927,18 +1908,17 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // If X and Y are both negative (as signed values) then their sum is not // zero unless both X and Y equal INT_MIN. if (BitWidth && XKnownNegative && YKnownNegative) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); + KnownBits Known(BitWidth); APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); - if ((KnownOne & Mask) != 0) + computeKnownBits(X, Known, Depth, Q); + if (Known.One.intersects(Mask)) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, Depth, Q); - if ((KnownOne & Mask) != 0) + computeKnownBits(Y, Known, Depth, Q); + if (Known.One.intersects(Mask)) return true; } @@ -1993,10 +1973,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } if (!BitWidth) return false; - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); - return KnownOne != 0; + KnownBits Known(BitWidth); + computeKnownBits(V, Known, Depth, Q); + return Known.One != 0; } /// Return true if V2 == V1 + X, where X is known non-zero. @@ -2028,14 +2007,13 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. auto BitWidth = Ty->getBitWidth(); - APInt KnownZero1(BitWidth, 0); - APInt KnownOne1(BitWidth, 0); - computeKnownBits(V1, KnownZero1, KnownOne1, 0, Q); - APInt KnownZero2(BitWidth, 0); - APInt KnownOne2(BitWidth, 0); - computeKnownBits(V2, KnownZero2, KnownOne2, 0, Q); + KnownBits Known1(BitWidth); + computeKnownBits(V1, Known1, 0, Q); + KnownBits Known2(BitWidth); + computeKnownBits(V2, Known2, 0, Q); - auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); + APInt OppositeBits = (Known1.Zero & Known2.One) | + (Known2.Zero & Known1.One); if (OppositeBits.getBoolValue()) return true; } @@ -2053,9 +2031,9 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { /// for all of the elements in the vector. bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, const Query &Q) { - APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); - return (KnownZero & Mask) == Mask; + KnownBits Known(Mask.getBitWidth()); + computeKnownBits(V, Known, Depth, Q); + return Mask.isSubsetOf(Known.Zero); } /// For vector constants, loop over the elements and find the constant with the @@ -2233,17 +2211,17 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If we are subtracting one from a positive number, there is no carry // out of the result. - if (KnownZero.isSignBitSet()) + if (Known.Zero.isSignBitSet()) return Tmp; } @@ -2258,16 +2236,16 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Handle NEG. if (const auto *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (KnownZero.isSignBitSet()) + if (Known.Zero.isSignBitSet()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -2319,16 +2297,16 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits)) return VecSignBits; - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); + KnownBits Known(TyBits); + computeKnownBits(V, Known, Depth, Q); // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. - if (KnownZero.isSignBitSet()) - return std::max(FirstAnswer, KnownZero.countLeadingOnes()); + if (Known.Zero.isSignBitSet()) + return std::max(FirstAnswer, Known.Zero.countLeadingOnes()); - if (KnownOne.isSignBitSet()) - return std::max(FirstAnswer, KnownOne.countLeadingOnes()); + if (Known.One.isSignBitSet()) + return std::max(FirstAnswer, Known.One.countLeadingOnes()); // computeKnownBits gave us no extra information about the top bits. return FirstAnswer; @@ -3534,26 +3512,22 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, // we can guarantee that the result does not overflow. // Ref: "Hacker's Delight" by Henry Warren unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI, - DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI, - DT); + KnownBits LHSKnown(BitWidth); + KnownBits RHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); // Note that underestimating the number of zero bits gives a more // conservative answer. - unsigned ZeroBits = LHSKnownZero.countLeadingOnes() + - RHSKnownZero.countLeadingOnes(); + unsigned ZeroBits = LHSKnown.Zero.countLeadingOnes() + + RHSKnown.Zero.countLeadingOnes(); // First handle the easy case: if we have enough zero bits there's // definitely no overflow. if (ZeroBits >= BitWidth) return OverflowResult::NeverOverflows; // Get the largest possible values for each operand. - APInt LHSMax = ~LHSKnownZero; - APInt RHSMax = ~RHSKnownZero; + APInt LHSMax = ~LHSKnown.Zero; + APInt RHSMax = ~RHSKnown.Zero; // We know the multiply operation doesn't overflow if the maximum values for // each operand will not overflow after we multiply them together. @@ -3565,7 +3539,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, // We know it always overflows if multiplying the smallest possible values for // the operands also results in overflow. bool MinOverflow; - (void)LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow); + (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow); if (MinOverflow) return OverflowResult::AlwaysOverflows; @@ -4284,11 +4258,10 @@ static bool isTruePredicate(CmpInst::Predicate Pred, // If X & C == 0 then (X | C) == X +_{nuw} C if (match(A, m_Or(m_Value(X), m_APInt(CA))) && match(B, m_Or(m_Specific(X), m_APInt(CB)))) { - unsigned BitWidth = CA->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT); + KnownBits Known(CA->getBitWidth()); + computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT); - if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB) + if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) return true; } diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1d3cde2f5ddb..e5aba03c8dc1 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -726,54 +726,50 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { } void ModuleBitcodeWriter::writeAttributeGroupTable() { - const std::vector &AttrGrps = VE.getAttributeGroups(); + const std::vector &AttrGrps = + VE.getAttributeGroups(); if (AttrGrps.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); SmallVector Record; - for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) { - AttributeList AS = AttrGrps[i]; - for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) { - AttributeList A = AS.getSlotAttributes(i); + for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) { + unsigned AttrListIndex = Pair.first; + AttributeSet AS = Pair.second; + Record.push_back(VE.getAttributeGroupID(Pair)); + Record.push_back(AttrListIndex); - Record.push_back(VE.getAttributeGroupID(A)); - Record.push_back(AS.getSlotIndex(i)); + for (Attribute Attr : AS) { + if (Attr.isEnumAttribute()) { + Record.push_back(0); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); + } else if (Attr.isIntAttribute()) { + Record.push_back(1); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); + Record.push_back(Attr.getValueAsInt()); + } else { + StringRef Kind = Attr.getKindAsString(); + StringRef Val = Attr.getValueAsString(); - for (AttributeList::iterator I = AS.begin(0), E = AS.end(0); I != E; - ++I) { - Attribute Attr = *I; - if (Attr.isEnumAttribute()) { + Record.push_back(Val.empty() ? 3 : 4); + Record.append(Kind.begin(), Kind.end()); + Record.push_back(0); + if (!Val.empty()) { + Record.append(Val.begin(), Val.end()); Record.push_back(0); - Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); - } else if (Attr.isIntAttribute()) { - Record.push_back(1); - Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); - Record.push_back(Attr.getValueAsInt()); - } else { - StringRef Kind = Attr.getKindAsString(); - StringRef Val = Attr.getValueAsString(); - - Record.push_back(Val.empty() ? 3 : 4); - Record.append(Kind.begin(), Kind.end()); - Record.push_back(0); - if (!Val.empty()) { - Record.append(Val.begin(), Val.end()); - Record.push_back(0); - } } } - - Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); - Record.clear(); } + + Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); + Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeAttributeTable() { - const std::vector &Attrs = VE.getAttributes(); + const std::vector &Attrs = VE.getAttributeLists(); if (Attrs.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); @@ -782,7 +778,8 @@ void ModuleBitcodeWriter::writeAttributeTable() { for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { const AttributeList &A = Attrs[i]; for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) - Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i))); + Record.push_back( + VE.getAttributeGroupID({A.getSlotIndex(i), A.getSlotAttributes(i)})); Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); @@ -1270,7 +1267,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.push_back(F.getCallingConv()); Vals.push_back(F.isDeclaration()); Vals.push_back(getEncodedLinkage(F)); - Vals.push_back(VE.getAttributeID(F.getAttributes())); + Vals.push_back(VE.getAttributeListID(F.getAttributes())); Vals.push_back(Log2_32(F.getAlignment())+1); Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); @@ -2616,7 +2613,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Code = bitc::FUNC_CODE_INST_INVOKE; - Vals.push_back(VE.getAttributeID(II->getAttributes())); + Vals.push_back(VE.getAttributeListID(II->getAttributes())); Vals.push_back(II->getCallingConv() | 1 << 13); Vals.push_back(VE.getValueID(II->getNormalDest())); Vals.push_back(VE.getValueID(II->getUnwindDest())); @@ -2808,7 +2805,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Code = bitc::FUNC_CODE_INST_CALL; - Vals.push_back(VE.getAttributeID(CI.getAttributes())); + Vals.push_back(VE.getAttributeListID(CI.getAttributes())); unsigned Flags = getOptimizationFlags(&I); Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV | diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 3800d9abd429..861150766986 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -891,19 +891,19 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { if (PAL.isEmpty()) return; // null is always 0. // Do a lookup. - unsigned &Entry = AttributeMap[PAL]; + unsigned &Entry = AttributeListMap[PAL]; if (Entry == 0) { // Never saw this before, add it. - Attribute.push_back(PAL); - Entry = Attribute.size(); + AttributeLists.push_back(PAL); + Entry = AttributeLists.size(); } // Do lookups for all attribute groups. for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { - AttributeList AS = PAL.getSlotAttributes(i); - unsigned &Entry = AttributeGroupMap[AS]; + IndexAndAttrSet Pair = {PAL.getSlotIndex(i), PAL.getSlotAttributes(i)}; + unsigned &Entry = AttributeGroupMap[Pair]; if (Entry == 0) { - AttributeGroups.push_back(AS); + AttributeGroups.push_back(Pair); Entry = AttributeGroups.size(); } } diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h index 8a82aab29836..e7ccc8df1e5f 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -48,6 +48,10 @@ class ValueEnumerator { // For each value, we remember its Value* and occurrence frequency. typedef std::vector > ValueList; + /// Attribute groups as encoded in bitcode are almost AttributeSets, but they + /// include the AttributeList index, so we have to track that in our map. + typedef std::pair IndexAndAttrSet; + UseListOrderStack UseListOrders; private: @@ -102,13 +106,13 @@ class ValueEnumerator { bool ShouldPreserveUseListOrder; - typedef DenseMap AttributeGroupMapType; + typedef DenseMap AttributeGroupMapType; AttributeGroupMapType AttributeGroupMap; - std::vector AttributeGroups; + std::vector AttributeGroups; - typedef DenseMap AttributeMapType; - AttributeMapType AttributeMap; - std::vector Attribute; + typedef DenseMap AttributeListMapType; + AttributeListMapType AttributeListMap; + std::vector AttributeLists; /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by /// the "getGlobalBasicBlockID" method. @@ -166,16 +170,17 @@ class ValueEnumerator { unsigned getInstructionID(const Instruction *I) const; void setInstructionID(const Instruction *I); - unsigned getAttributeID(AttributeList PAL) const { + unsigned getAttributeListID(AttributeList PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeMapType::const_iterator I = AttributeMap.find(PAL); - assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!"); + AttributeListMapType::const_iterator I = AttributeListMap.find(PAL); + assert(I != AttributeListMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } - unsigned getAttributeGroupID(AttributeList PAL) const { - if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL); + unsigned getAttributeGroupID(IndexAndAttrSet Group) const { + if (!Group.second.hasAttributes()) + return 0; // Null maps to zero. + AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(Group); assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } @@ -206,8 +211,8 @@ class ValueEnumerator { const std::vector &getBasicBlocks() const { return BasicBlocks; } - const std::vector &getAttributes() const { return Attribute; } - const std::vector &getAttributeGroups() const { + const std::vector &getAttributeLists() const { return AttributeLists; } + const std::vector &getAttributeGroups() const { return AttributeGroups; } diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 955524c2a676..3a57772cc7f5 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -964,10 +964,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // sure to update that as well. const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; if (!SU) continue; - for (DbgValueVector::iterator DVI = DbgValues.begin(), - DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q.second.Operand->getParent()) - UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); + UpdateDbgValues(DbgValues, Q.second.Operand->getParent(), + AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h index 04f7f419f5ea..d14d93100adb 100644 --- a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h @@ -60,6 +60,25 @@ class LLVM_LIBRARY_VISIBILITY AntiDepBreaker { if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg) MI.getOperand(0).setReg(NewReg); } + + /// Update all DBG_VALUE instructions that may be affected by the dependency + /// breaker's update of ParentMI to use NewReg. + void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI, + unsigned OldReg, unsigned NewReg) { + // The following code is dependent on the order in which the DbgValues are + // constructed in ScheduleDAGInstrs::buildSchedGraph. + MachineInstr *PrevDbgMI = nullptr; + for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) { + MachineInstr *PrevMI = DV.second; + if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) { + MachineInstr *DbgMI = DV.first; + UpdateDbgValue(*DbgMI, OldReg, NewReg); + PrevDbgMI = DbgMI; + } else if (PrevDbgMI) { + break; // If no match and already found a DBG_VALUE, we're done. + } + } + } }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 028c79f3ab6d..d99065b1b67a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -825,41 +825,25 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << Name << ":"; } OS << V->getName(); - - const DIExpression *Expr = MI->getDebugExpression(); - auto Fragment = Expr->getFragmentInfo(); - if (Fragment) - OS << " [fragment offset=" << Fragment->OffsetInBits - << " size=" << Fragment->SizeInBits << "]"; OS << " <- "; // The second operand is only an offset if it's an immediate. - bool Deref = false; bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0; - for (unsigned i = 0; i < Expr->getNumElements(); ++i) { - uint64_t Op = Expr->getElement(i); - if (Op == dwarf::DW_OP_LLVM_fragment) { - // There can't be any operands after this in a valid expression - break; - } else if (Deref) { - // We currently don't support extra Offsets or derefs after the first - // one. Bail out early instead of emitting an incorrect comment. - OS << " [complex expression]"; - AP.OutStreamer->emitRawComment(OS.str()); - return true; - } else if (Op == dwarf::DW_OP_deref) { - Deref = true; - continue; - } - - uint64_t ExtraOffset = Expr->getElement(i++); - if (Op == dwarf::DW_OP_plus) - Offset += ExtraOffset; - else { - assert(Op == dwarf::DW_OP_minus); - Offset -= ExtraOffset; + const DIExpression *Expr = MI->getDebugExpression(); + if (Expr->getNumElements()) { + OS << '['; + bool NeedSep = false; + for (auto Op : Expr->expr_ops()) { + if (NeedSep) + OS << ", "; + else + NeedSep = true; + OS << dwarf::OperationEncodingString(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + OS << ' ' << Op.getArg(I); } + OS << "] "; } // Register or immediate value. Register 0 means undef. @@ -890,7 +874,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); Offset += TFI->getFrameIndexReference(*AP.MF, MI->getOperand(0).getIndex(), Reg); - Deref = true; + MemLoc = true; } if (Reg == 0) { // Suppress offset, it is not meaningful here. @@ -899,12 +883,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer->emitRawComment(OS.str()); return true; } - if (MemLoc || Deref) + if (MemLoc) OS << '['; OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } - if (MemLoc || Deref) + if (MemLoc) OS << '+' << Offset << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. @@ -936,6 +920,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { if (needsCFIMoves() == CFI_M_None) return; + // If there is no "real" instruction following this CFI instruction, skip + // emitting it; it would be beyond the end of the function's FDE range. + auto *MBB = MI.getParent(); + auto I = std::next(MI.getIterator()); + while (I != MBB->end() && I->isTransient()) + ++I; + if (I == MBB->instr_end() && + MBB->getReverseIterator() == MBB->getParent()->rbegin()) + return; + const std::vector &Instrs = MF->getFrameInstructions(); unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); const MCCFIInstruction &CFI = Instrs[CFIIndex]; @@ -1046,15 +1040,23 @@ void AsmPrinter::EmitFunctionBody() { // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the // labels from collapsing together. Just emit a noop. - if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { + // Similarly, don't emit empty functions on Windows either. It can lead to + // duplicate entries (two functions with the same RVA) in the Guard CF Table + // after linking, causing the kernel not to load the binary: + // https://developercommunity.visualstudio.com/content/problem/45366/vc-linker-creates-invalid-dll-with-clang-cl.html + // FIXME: Hide this behind some API in e.g. MCAsmInfo or MCTargetStreamer. + const Triple &TT = TM.getTargetTriple(); + if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() || + (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) { MCInst Noop; - MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop); - OutStreamer->AddComment("avoids zero-length function"); + MF->getSubtarget().getInstrInfo()->getNoop(Noop); // Targets can opt-out of emitting the noop here by leaving the opcode // unspecified. - if (Noop.getOpcode()) + if (Noop.getOpcode()) { + OutStreamer->AddComment("avoids zero-length function"); OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); + } } const Function *F = MF->getFunction(); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 683e622e3d53..a0bf1632dff3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -144,6 +144,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); + if (Dialect == InlineAsm::AD_Intel) + // We need this flag to be able to parse numbers like "0bH" + Parser->setParsingInlineAsm(true); if (MF) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 31c2b3b5e752..30bfd7c94e68 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -655,20 +655,12 @@ void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref_addr: { // Get the absolute offset for this DIE within the debug info/types section. unsigned Addr = Entry->getDebugSectionOffset(); - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { - const DwarfDebug *DD = AP->getDwarfDebug(); - if (DD) - assert(!DD->useSplitDwarf() && - "TODO: dwo files can't have relocations."); - const DIEUnit *Unit = Entry->getUnit(); - assert(Unit && "CUDie should belong to a CU."); - MCSection *Section = Unit->getSection(); - if (Section) { - const MCSymbol *SectionSym = Section->getBeginSymbol(); - AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); - return; - } + if (const MCSymbol *SectionSym = + Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { + AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); + return; } + AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form)); return; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 9a64b4b76b06..20a415150b4d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -28,7 +28,7 @@ class DwarfFile; class MCSymbol; class LexicalScope; -class DwarfCompileUnit : public DwarfUnit { +class DwarfCompileUnit final : public DwarfUnit { /// A numeric ID unique among all CUs in the module unsigned UniqueID; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index d72656bcc58d..6f442f5c3172 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -90,14 +90,6 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, clEnumVal(Disable, "Disabled")), cl::init(Default)); -static cl::opt -SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output DWARF5 split debug info."), - cl::values(clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled")), - cl::init(Default)); - static cl::opt DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, cl::desc("Generate DWARF pubnames and pubtypes sections"), @@ -253,11 +245,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) HasAppleExtensionAttributes = tuneForLLDB(); - // Handle split DWARF. Off by default for now. - if (SplitDwarf == Default) - HasSplitDwarf = false; - else - HasSplitDwarf = SplitDwarf == Enable; + // Handle split DWARF. + HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty(); // Pubnames/pubtypes on by default for GDB. if (DwarfPubSections == Default) @@ -412,7 +401,7 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { if (useSplitDwarf()) { NewCU.setSkeleton(constructSkeletonCU(NewCU)); NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit->getSplitDebugFilename()); + Asm->TM.Options.MCOptions.SplitDwarfFile); } // LTO with assembly output shares a single line table amongst multiple CUs. @@ -1885,7 +1874,7 @@ void DwarfDebug::emitDebugMacinfo() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr NewU) { NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, - U.getCUNode()->getSplitDebugFilename()); + Asm->TM.Options.MCOptions.SplitDwarfFile); if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index f65dc151f301..ccd326917bfd 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -117,8 +117,9 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, // Otherwise, attempt to find a covering set of sub-register numbers. // For example, Q0 on ARM is a composition of D0+D1. unsigned CurPos = 0; - // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; + // The size of the register in bits. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg); + unsigned RegSize = TRI.getRegSizeInBits(*RC); // Keep track of the bits in the register we already emitted, so we // can avoid emitting redundant aliasing subregs. SmallBitVector Coverage(RegSize, false); @@ -198,8 +199,10 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned FragmentOffsetInBits) { auto Fragment = ExprCursor.getFragmentInfo(); - if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) + if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) { + LocationKind = Unknown; return false; + } bool HasComplexExpression = false; auto Op = ExprCursor.peek(); @@ -212,6 +215,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // operation to multiple DW_OP_pieces. if (HasComplexExpression && DwarfRegs.size() > 1) { DwarfRegs.clear(); + LocationKind = Unknown; return false; } @@ -233,6 +237,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return Op.getOp() == dwarf::DW_OP_stack_value; })) { DwarfRegs.clear(); + LocationKind = Unknown; return false; } @@ -343,7 +348,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, emitUnsigned(Op->getArg(0)); break; case dwarf::DW_OP_stack_value: - assert(LocationKind == Unknown || LocationKind == Implicit); LocationKind = Implicit; break; case dwarf::DW_OP_swap: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index bac0c204d04f..16fb20dd7e20 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1595,3 +1595,11 @@ void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { getCU().addGlobalTypeUnitType(Ty, Context); } + +const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const { + if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return nullptr; + if (isDwoUnit()) + return nullptr; + return getSection()->getBeginSymbol(); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index d626ef920f95..e84df4650882 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -104,8 +104,6 @@ class RangeSpanList { bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); public: - virtual ~DwarfUnit(); - // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } @@ -289,6 +287,8 @@ class RangeSpanList { void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); protected: + ~DwarfUnit(); + /// Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); @@ -335,9 +335,10 @@ class RangeSpanList { void setIndexTyDie(DIE *D) { IndexTyDie = D; } virtual bool isDwoUnit() const = 0; + const MCSymbol *getCrossSectionRelativeBaseAddress() const override; }; -class DwarfTypeUnit : public DwarfUnit { +class DwarfTypeUnit final : public DwarfUnit { uint64_t TypeSignature; const DIE *Ty; DwarfCompileUnit &CU; diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index e1eeddf0816c..b2d6652b075e 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -648,10 +648,8 @@ BreakAntiDependencies(const std::vector& SUnits, // as well. const SUnit *SU = MISUnitMap[Q->second->getParent()]; if (!SU) continue; - for (DbgValueVector::iterator DVI = DbgValues.begin(), - DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q->second->getParent()) - UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); + UpdateDbgValues(DbgValues, Q->second->getParent(), + AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 26454c1ef00f..cf97c635e79a 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -145,6 +145,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } } + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + // Now that selection is complete, there are no more generic vregs. Verify // that the size of the now-constrained vreg is unchanged and that it has a // register class. @@ -165,7 +167,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; if (VRegToType.second.isValid() && - VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) { + VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { reportGISelFailure(MF, TPC, MORE, "gisel-select", "VReg has explicit size different from class size", *MI); diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 58778077bc0e..ef5818dabe23 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -76,6 +76,12 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { + case TargetOpcode::G_SDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SDIV_I32; + case TargetOpcode::G_UDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UDIV_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -87,31 +93,43 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +static LegalizerHelper::LegalizeResult +simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType) { + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + const char *Name = TLI.getLibcallName(Libcall); + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); + CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), + MachineOperand::CreateES(Name), + {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); + MI.eraseFromParent(); + return LegalizerHelper::Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = Ty.getSizeInBits(); + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: { + Type *Ty = Type::getInt32Ty(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, Ty); + } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); - auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - auto Libcall = getRTLibDesc(MI.getOpcode(), Size); - const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - CLI.lowerCall( - MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), {MI.getOperand(0).getReg(), Ty}, - {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}}); - MI.eraseFromParent(); - return Legalized; + return simpleLibcall(MI, MIRBuilder, Size, Ty); } } } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp index 940957d02152..83b21e637097 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -48,7 +48,7 @@ bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { // Verify that the Size of the register bank is big enough to cover // all the register classes it covers. - assert((getSize() >= SubRC.getSize() * 8) && + assert(getSize() >= TRI.getRegSizeInBits(SubRC) && "Size is not big enough for all the subclasses!"); assert(covers(SubRC) && "Not all subclasses are covered"); } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index b2df2f159676..d5ae9a6776a4 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -421,7 +421,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, RC = MRI.getRegClass(Reg); } assert(RC && "Unable to deduce the register class"); - return RC->getSize() * 8; + return TRI.getRegSizeInBits(*RC); } //------------------------------------------------------------------------------ diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp index 6da174a53666..b6624b88fe23 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -925,9 +925,6 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, << CmpInst::getPredicateName(Pred) << ')'; break; } - case MachineOperand::MO_Placeholder: - OS << ""; - break; } } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 4bd5fbfe38e6..1faf6292a9c1 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -287,8 +287,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getIntrinsicID() == Other.getIntrinsicID(); case MachineOperand::MO_Predicate: return getPredicate() == Other.getPredicate(); - case MachineOperand::MO_Placeholder: - return true; } llvm_unreachable("Invalid machine operand type"); } @@ -337,8 +335,6 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); - case MachineOperand::MO_Placeholder: - return hash_combine(); } llvm_unreachable("Invalid machine operand type"); } @@ -515,9 +511,6 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << CmpInst::getPredicateName(Pred) << '>'; break; } - case MachineOperand::MO_Placeholder: - OS << ""; - break; } if (unsigned TF = getTargetFlags()) OS << "[TF=" << TF << ']'; diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index b3d18435985e..7eb991744f01 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -330,7 +330,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { /// Return true if instruction stores to the specified frame. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { // If we lost memory operands, conservatively assume that the instruction - // writes to all slots. + // writes to all slots. if (MI->memoperands_empty()) return true; for (const MachineMemOperand *MemOp : MI->memoperands()) { @@ -708,7 +708,7 @@ void MachineLICM::SinkIntoLoop() { for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. - // As such, it must not have side-effects, e.g. such as a call has. + // As such, it must not have side-effects, e.g. such as a call has. if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) Candidates.push_back(&*I); } @@ -837,9 +837,9 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, /// constant pool. static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); - + // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. + // reads from everything.. if (MI.memoperands_empty()) return true; @@ -1337,7 +1337,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); // Since we are moving the instruction out of its basic block, we do not - // retain its debug location. Doing so would degrade the debugging + // retain its debug location. Doing so would degrade the debugging // experience and adversely affect the accuracy of profiling information. MI->setDebugLoc(DebugLoc()); diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 1354009794cb..570a0cd0ba90 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -373,22 +373,22 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, FixedSlot->Reg != Reg) ++FixedSlot; + unsigned Size = RegInfo->getSpillSize(*RC); if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); + unsigned Align = RegInfo->getSpillAlignment(*RC); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true); + FrameIdx = MFI.CreateStackObject(Size, Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = - MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset); } CS.setFrameIdx(FrameIdx); diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index 283d84629f8e..c606b7b83310 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -212,8 +212,9 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { return SS; // Already has space allocated? // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); + int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align); // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 6392136fa290..35db30f89976 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -395,8 +395,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. const MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned NeedSize = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + unsigned NeedSize = TRI->getSpillSize(*RC); + unsigned NeedAlign = TRI->getSpillAlignment(*RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4702d63cb617..1251ae6262b8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3878,27 +3878,29 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; + SDValue N0 = N.getOperand(0); + unsigned Opc0 = N0.getOpcode(); + ConstantSDNode *N1C = dyn_cast(N.getOperand(1)); if (!N1C) return false; - unsigned Num; + unsigned MaskByteOffset; switch (N1C->getZExtValue()) { default: return false; - case 0xFF: Num = 0; break; - case 0xFF00: Num = 1; break; - case 0xFF0000: Num = 2; break; - case 0xFF000000: Num = 3; break; + case 0xFF: MaskByteOffset = 0; break; + case 0xFF00: MaskByteOffset = 1; break; + case 0xFF0000: MaskByteOffset = 2; break; + case 0xFF000000: MaskByteOffset = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). - SDValue N0 = N.getOperand(0); if (Opc == ISD::AND) { - if (Num == 0 || Num == 2) { + if (MaskByteOffset == 0 || MaskByteOffset == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 - if (N0.getOpcode() != ISD::SRL) + if (Opc0 != ISD::SRL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3906,7 +3908,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 - if (N0.getOpcode() != ISD::SHL) + if (Opc0 != ISD::SHL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3915,7 +3917,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 - if (Num != 0 && Num != 2) + if (MaskByteOffset != 0 && MaskByteOffset != 2) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3923,17 +3925,17 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 - if (Num != 1 && Num != 3) + if (MaskByteOffset != 1 && MaskByteOffset != 3) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } - if (Parts[Num]) + if (Parts[MaskByteOffset]) return false; - Parts[Num] = N0.getOperand(0).getNode(); + Parts[MaskByteOffset] = N0.getOperand(0).getNode(); return true; } @@ -4198,20 +4200,22 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // reassociate or if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) != 0. - if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - isa(N0.getOperand(1))) { - ConstantSDNode *C1 = cast(N0.getOperand(1)); - if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, - N1C, C1)) - return DAG.getNode( - ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); - return SDValue(); + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *C1 = dyn_cast(N0.getOperand(1))) { + if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) { + if (SDValue COR = + DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); + } } } + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) @@ -5611,24 +5615,24 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) if (N1C && N0.getOpcode() == ISD::TRUNCATE && - N0.getOperand(0).getOpcode() == ISD::SRL && - isa(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); - // This is only valid if the OpSizeInBits + c1 = size of inner shift. - if (c1 + OpSizeInBits == InnerShiftSize) { - SDLoc DL(N0); - if (c1 + c2 >= InnerShiftSize) - return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::TRUNCATE, DL, VT, - DAG.getNode(ISD::SRL, DL, InnerShiftVT, - N0.getOperand(0)->getOperand(0), - DAG.getConstant(c1 + c2, DL, - ShiftCountVT))); + N0.getOperand(0).getOpcode() == ISD::SRL) { + if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { + uint64_t c1 = N001C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, + N0.getOperand(0).getOperand(0), + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); + } } } @@ -11641,7 +11645,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // Check if this is a trunc(lshr). if (User->getOpcode() == ISD::SRL && User->hasOneUse() && isa(User->getOperand(1))) { - Shift = cast(User->getOperand(1))->getZExtValue(); + Shift = User->getConstantOperandVal(1); User = *User->use_begin(); } @@ -13120,8 +13124,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // do this only if indices are both constants and Idx1 < Idx0. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() && isa(InVec.getOperand(2))) { - unsigned OtherElt = - cast(InVec.getOperand(2))->getZExtValue(); + unsigned OtherElt = InVec.getConstantOperandVal(2); if (Elt < OtherElt) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, @@ -14065,7 +14068,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { if (!isa(Op.getOperand(1))) return SDValue(); - int ExtIdx = cast(Op.getOperand(1))->getZExtValue(); + int ExtIdx = Op.getConstantOperandVal(1); // Ensure that we are extracting a subvector from a vector the same // size as the result. @@ -15049,7 +15052,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && N1.getValueType() == N0.getOperand(1).getValueType() && isa(N0.getOperand(2))) { - unsigned OtherIdx = cast(N0.getOperand(2))->getZExtValue(); + unsigned OtherIdx = N0.getConstantOperandVal(2); if (InsIdx < OtherIdx) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, @@ -16088,6 +16091,19 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { if (Op1->isInvariant() && Op0->writeMem()) return false; + unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; + unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; + + // Check for BaseIndexOffset matching. + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); + if (BasePtr0.equalBaseIndex(BasePtr1)) + return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) || + (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset)); + + // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis + // modified to use BaseIndexOffset. + // Gather base node and offset information. SDValue Base0, Base1; int64_t Offset0, Offset1; @@ -16099,8 +16115,6 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { Base1, Offset1, GV1, CV1); // If they have the same base address, then check to see if they overlap. - unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; - unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1))) return !((Offset0 + NumBytes0) <= Offset1 || (Offset1 + NumBytes1) <= Offset0); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e85d1951e3ae..b235e19aaab2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -161,7 +161,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (VRBase) { DstRC = MRI->getRegClass(VRBase); } else if (UseRC) { - assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + assert(TRI->isTypeLegalForClass(*UseRC, VT) && + "Incompatible phys register def and uses!"); DstRC = UseRC; } else { DstRC = TLI->getRegClassFor(VT); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3bae3bf9ab7c..fdebb8bd00db 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3497,11 +3497,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // part. unsigned LoSize = VT.getSizeInBits(); SDValue HiLHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getNode(ISD::SRA, dl, VT, LHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue HiRHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getNode(ISD::SRA, dl, VT, RHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 85068e890756..9ed70c9b4db9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3251,7 +3251,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } @@ -3294,7 +3294,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { @@ -3342,7 +3342,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) { @@ -3445,5 +3445,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps); + return DAG.getBuildVector(N->getValueType(0), dl, NewOps); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c02b8960b36c..aa69e0e2adfc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -362,8 +362,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, - makeArrayRef(Ops.data(), NumElts)); + SDValue Vec = + DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -396,10 +396,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { NewElts.push_back(Hi); } - SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*DAG.getContext(), - NewVT, NewElts.size()), - NewElts); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()); + SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); @@ -458,7 +456,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1a7d7b7af5fa..4a3160297d64 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -512,7 +512,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); + return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -523,16 +523,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } -/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, -/// so just return the element, ignoring the index. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { - SDValue Res = GetScalarizedVector(N->getOperand(0)); - if (Res.getValueType() != N->getValueType(0)) - Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), - Res); - return Res; -} - +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, +/// so just return the element, ignoring the index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != VT) + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res); + return Res; +} + /// If the input condition is a vector that needs to be scalarized, it must be /// <1 x i1>, so just convert to a normal ISD::SELECT @@ -2631,7 +2631,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { if (InVT.isVector()) NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); else - NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); + NewVec = DAG.getBuildVector(NewInVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e923e30e5037..69b76fbe57d2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1320,6 +1320,18 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (MCID.hasOptionalDef()) { + // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. + // This operand can be either a def of CPSR, if the S bit is set; or a use + // of %noreg. When the OptionalDef is set to a valid register, we need to + // handle it in the same way as an ImplicitDef. + for (unsigned i = 0; i < MCID.getNumDefs(); ++i) + if (MCID.OpInfo[i].isOptionalDef()) { + const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); + unsigned Reg = cast(OptionalDef)->getReg(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + } if (!MCID.ImplicitDefs) continue; for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 523f409e6b2c..439f67f1e155 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" @@ -2868,7 +2869,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // A left-shift of a constant one will have exactly one bit set because // shifting the bit off the end is undefined. if (Val.getOpcode() == ISD::SHL) { - auto *C = dyn_cast(Val.getOperand(0)); + auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue() == 1) return true; } @@ -2876,7 +2877,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // Similarly, a logical right-shift of a constant sign-bit will have exactly // one bit set. if (Val.getOpcode() == ISD::SRL) { - auto *C = dyn_cast(Val.getOperand(0)); + auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue().isSignMask()) return true; } @@ -7539,10 +7540,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::computeKnownBits(const_cast(GV), KnownZero, KnownOne, + KnownBits Known(PtrWidth); + llvm::computeKnownBits(const_cast(GV), Known, getDataLayout()); - unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned AlignBits = Known.Zero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) return MinAlign(Align, GVOffset); @@ -7629,52 +7630,52 @@ Type *ConstantPoolSDNode::getType() const { return Val.ConstVal->getType(); } -bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, - APInt &SplatUndef, +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits, - bool isBigEndian) const { + bool IsBigEndian) const { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); - unsigned sz = VT.getSizeInBits(); - if (MinSplatBits > sz) + unsigned VecWidth = VT.getSizeInBits(); + if (MinSplatBits > VecWidth) return false; - SplatValue = APInt(sz, 0); - SplatUndef = APInt(sz, 0); + // FIXME: The widths are based on this node's type, but build vectors can + // truncate their operands. + SplatValue = APInt(VecWidth, 0); + SplatUndef = APInt(VecWidth, 0); - // Get the bits. Bits with undefined values (when the corresponding element + // Get the bits. Bits with undefined values (when the corresponding element // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared - // in SplatValue. If any of the values are not constant, give up and return + // in SplatValue. If any of the values are not constant, give up and return // false. - unsigned int nOps = getNumOperands(); - assert(nOps > 0 && "isConstantSplat has 0-size build vector"); - unsigned EltBitSize = VT.getScalarSizeInBits(); + unsigned int NumOps = getNumOperands(); + assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltWidth = VT.getScalarSizeInBits(); - for (unsigned j = 0; j < nOps; ++j) { - unsigned i = isBigEndian ? nOps-1-j : j; + for (unsigned j = 0; j < NumOps; ++j) { + unsigned i = IsBigEndian ? NumOps - 1 - j : j; SDValue OpVal = getOperand(i); - unsigned BitPos = j * EltBitSize; + unsigned BitPos = j * EltWidth; if (OpVal.isUndef()) - SplatUndef.setBits(BitPos, BitPos + EltBitSize); - else if (ConstantSDNode *CN = dyn_cast(OpVal)) - SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize), - BitPos); - else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) + SplatUndef.setBits(BitPos, BitPos + EltWidth); + else if (auto *CN = dyn_cast(OpVal)) + SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); + else if (auto *CN = dyn_cast(OpVal)) SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); else return false; } - // The build_vector is all constants or undefs. Find the smallest element + // The build_vector is all constants or undefs. Find the smallest element // size that splats the vector. - HasAnyUndefs = (SplatUndef != 0); - while (sz > 8) { - unsigned HalfSize = sz / 2; + // FIXME: This does not work for vectors with elements less than 8 bits. + while (VecWidth > 8) { + unsigned HalfSize = VecWidth / 2; APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); APInt LowValue = SplatValue.trunc(HalfSize); APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); @@ -7688,10 +7689,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SplatValue = HighValue | LowValue; SplatUndef = HighUndef & LowUndef; - sz = HalfSize; + VecWidth = HalfSize; } - SplatBitSize = sz; + SplatBitSize = VecWidth; return true; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2c58953ee908..6a737ed84ea4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -362,11 +362,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, return DAG.getUNDEF(ValueVT); } - if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) - Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); + EVT ValueSVT = ValueVT.getVectorElementType(); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); + return DAG.getBuildVector(ValueVT, DL, Val); } static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, @@ -537,7 +537,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); + Val = DAG.getBuildVector(PartVT, DL, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -1088,8 +1088,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1141,7 +1140,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -3147,7 +3146,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops)); + setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3969,9 +3968,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFenceOperandTy(DAG.getDataLayout())); Ops[2] = DAG.getConstant(I.getSynchScope(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -4896,11 +4895,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Entry.Node = Src; Args.push_back(Entry); - + Entry.Ty = I.getArgOperand(2)->getType(); Entry.Node = NumElements; Args.push_back(Entry); - + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); Entry.Node = ElementSize; Args.push_back(Entry); @@ -5183,7 +5182,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); + ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, @@ -5743,7 +5742,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fadd: Opcode = ISD::STRICT_FADD; break; case Intrinsic::experimental_constrained_fsub: @@ -6653,12 +6652,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, MachineFunction &MF = DAG.getMachineFunction(); SmallVector Regs; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair PhysReg = - TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), - OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; @@ -6666,12 +6665,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. - if (OpInfo.Type == InlineAsm::isInput && - PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && + !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - MVT RegVT = *PhysReg.second->vt_begin(); + MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); @@ -6699,12 +6698,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = *RC->vt_begin(); + ValueVT = *TRI.legalclasstypes_begin(*RC); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); @@ -6730,7 +6729,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 93c6738f650d..136dec873cb8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -342,11 +342,16 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// If the specified instruction has a constant integer operand and there are /// bits set in that constant that are not demanded, then clear those bits and /// return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( - SDValue Op, const APInt &Demanded) { +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + SelectionDAG &DAG = TLO.DAG; SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); + // Do target-specific constant optimization. + if (targetShrinkDemandedConstant(Op, Demanded, TLO)) + return TLO.New.getNode(); + // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Opcode) { default: @@ -367,7 +372,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( EVT VT = Op.getValueType(); SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); - return CombineTo(Op, NewOp); + return TLO.CombineTo(Op, NewOp); } break; @@ -380,15 +385,17 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, - unsigned BitWidth, - const APInt &Demanded, - const SDLoc &dl) { +bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, + const APInt &Demanded, + TargetLoweringOpt &TLO) const { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + SelectionDAG &DAG = TLO.DAG; + SDLoc dl(Op); + // Early return, as this function cannot handle vector types. if (Op.getValueType().isVector()) return false; @@ -418,23 +425,22 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, bool NeedZext = DemandedSize > SmallVTBits; SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, dl, Op.getValueType(), X); - return CombineTo(Op, Z); + return TLO.CombineTo(Op, Z); } } return false; } bool -TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, - unsigned OpIdx, - const APInt &Demanded, - DAGCombinerInfo &DCI) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, + const APInt &Demanded, + DAGCombinerInfo &DCI, + TargetLoweringOpt &TLO) const { SDValue Op = User->getOperand(OpIdx); APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, - *this, 0, true)) + if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, + TLO, 0, true)) return false; @@ -446,9 +452,9 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, // with the value 'x', which will give us: // Old = i32 and x, 0xffffff // New = x - if (Old.hasOneUse()) { + if (TLO.Old.hasOneUse()) { // For the one use case, we just commit the change. - DCI.CommitTargetLoweringOpt(*this); + DCI.CommitTargetLoweringOpt(TLO); return true; } @@ -456,17 +462,17 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, // AssumeSingleUse flag is not propogated to recursive calls of // SimplifyDemanded bits, so the only node with multiple use that // it will attempt to combine will be opt. - assert(Old == Op); + assert(TLO.Old == Op); SmallVector NewOps; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (i == OpIdx) { - NewOps.push_back(New); + NewOps.push_back(TLO.New); continue; } NewOps.push_back(User->getOperand(i)); } - DAG.UpdateNodeOperands(User, NewOps); + TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); @@ -585,7 +591,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -620,10 +626,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((NewMask & (KnownZero|KnownZero2)) == NewMask) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -654,10 +660,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -682,7 +688,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -727,7 +733,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // If it already has all the bits set, nothing to change // but don't shrink either! - } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + } else if (ShrinkDemandedConstant(Op, NewMask, TLO)) { return true; } } @@ -746,7 +752,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -764,7 +770,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -1284,7 +1290,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, KnownOne2, TLO, Depth+1) || // See if the operation should be performed at a smaller bit width. - TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) { + ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { const SDNodeFlags *Flags = Op.getNode()->getFlags(); if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we @@ -1358,31 +1364,38 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must +// work with truncating build vectors and vectors with elements of less than +// 8 bits. bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; - const ConstantSDNode *CN = dyn_cast(N); - if (!CN) { - const BuildVectorSDNode *BV = dyn_cast(N); - if (!BV) - return false; - - // Only interested in constant splats, we don't care about undef - // elements in identifying boolean constants and getConstantSplatNode - // returns NULL if all ops are undef; - CN = BV->getConstantSplatNode(); + APInt CVal; + if (auto *CN = dyn_cast(N)) { + CVal = CN->getAPIntValue(); + } else if (auto *BV = dyn_cast(N)) { + auto *CN = BV->getConstantSplatNode(); if (!CN) return false; + + // If this is a truncating build vector, truncate the splat value. + // Otherwise, we may fail to match the expected values below. + unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits(); + CVal = CN->getAPIntValue(); + if (BVEltWidth < CVal.getBitWidth()) + CVal = CVal.trunc(BVEltWidth); + } else { + return false; } switch (getBooleanContents(N->getValueType(0))) { case UndefinedBooleanContent: - return CN->getAPIntValue()[0]; + return CVal[0]; case ZeroOrOneBooleanContent: - return CN->isOne(); + return CVal == 1; case ZeroOrNegativeOneBooleanContent: - return CN->isAllOnesValue(); + return CVal.isAllOnesValue(); } llvm_unreachable("Invalid boolean contents"); @@ -2535,7 +2548,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, for (const TargetRegisterClass *RC : RI->regclasses()) { // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. - if (!isLegalRC(RC)) + if (!isLegalRC(*RI, *RC)) continue; for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); @@ -2547,9 +2560,9 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, // If this register class has the requested value type, return it, // otherwise keep searching and return the first class found // if no other is found which explicitly has the requested type. - if (RC->hasType(VT)) + if (RI->isTypeLegalForClass(*RC, VT)) return S; - else if (!R.second) + if (!R.second) R = S; } } diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index 1a8ec5bff322..315b059c5ac9 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -161,7 +161,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (SubRegIdx) Offset = TRI->getSubRegIdxOffset(SubRegIdx); - Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset); + Locs.emplace_back(Location::Register, TRI->getSpillSize(*RC), + DwarfRegNum, Offset); return ++MOI; } @@ -245,7 +246,7 @@ void StackMaps::print(raw_ostream &OS) { StackMaps::LiveOutReg StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + unsigned Size = TRI->getSpillSize(*TRI->getMinimalPhysRegClass(Reg)); return LiveOutReg(Reg, DwarfRegNum, Size); } diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index 711144a34743..14c5adc0d898 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -345,12 +345,12 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!SubIdx) { - Size = RC->getSize(); + Size = TRI->getSpillSize(*RC); Offset = 0; return true; } - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). @@ -364,10 +364,10 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, Size = BitSize /= 8; Offset = (unsigned)BitOffset / 8; - assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); if (!MF.getDataLayout().isLittleEndian()) { - Offset = RC->getSize() - (Offset + Size); + Offset = TRI->getSpillSize(*RC) - (Offset + Size); } return true; } @@ -428,8 +428,8 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, return nullptr; } -void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { - llvm_unreachable("Not a MachO target"); +void TargetInstrInfo::getNoop(MCInst &NopInst) const { + llvm_unreachable("Not implemented"); } static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 27630a3055cb..e579922bb69e 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1184,12 +1184,11 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, /// isLegalRC - Return true if the value types that can be represented by the /// specified register class are all legal. -bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { +bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) const { + for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) if (isTypeLegal(*I)) return true; - } return false; } @@ -1299,9 +1298,9 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { const TargetRegisterClass *SuperRC = TRI->getRegClass(i); // We want the largest possible spill size. - if (SuperRC->getSize() <= BestRC->getSize()) + if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC)) continue; - if (!isLegalRC(SuperRC)) + if (!isLegalRC(*TRI, *SuperRC)) continue; BestRC = SuperRC; } diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 66cdad278e8d..f6e4c17d514c 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -156,8 +156,8 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { // this physreg. const TargetRegisterClass* BestRC = nullptr; for (const TargetRegisterClass* RC : regclasses()) { - if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || BestRC->hasSubClass(RC))) + if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) && + RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } @@ -207,7 +207,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, if (unsigned Common = *A++ & *B++) { const TargetRegisterClass *RC = TRI->getRegClass(I + countTrailingZeros(Common)); - if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT)) return RC; } return nullptr; @@ -265,7 +265,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, const TargetRegisterClass *BestRC = nullptr; unsigned *BestPreA = &PreA; unsigned *BestPreB = &PreB; - if (RCA->getSize() < RCB->getSize()) { + if (getRegSizeInBits(*RCA) < getRegSizeInBits(*RCB)) { std::swap(RCA, RCB); std::swap(SubA, SubB); std::swap(BestPreA, BestPreB); @@ -273,7 +273,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Also terminate the search one we have found a register class as small as // RCA. - unsigned MinSize = RCA->getSize(); + unsigned MinSize = getRegSizeInBits(*RCA); for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); @@ -281,7 +281,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Check if a common super-register class exists for this index pair. const TargetRegisterClass *RC = firstCommonClass(IA.getMask(), IB.getMask(), this); - if (!RC || RC->getSize() < MinSize) + if (!RC || getRegSizeInBits(*RC) < MinSize) continue; // The indexes must compose identically: PreA+SubA == PreB+SubB. @@ -290,7 +290,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, continue; // Is RC a better candidate than BestRC? - if (BestRC && RC->getSize() >= BestRC->getSize()) + if (BestRC && getRegSizeInBits(*RC) >= getRegSizeInBits(*BestRC)) continue; // Yes, RC is the smallest super-register seen so far. @@ -299,7 +299,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, *BestPreB = IB.getSubReg(); // Bail early if we reached MinSize. We won't find a better candidate. - if (BestRC->getSize() == MinSize) + if (getRegSizeInBits(*BestRC) == MinSize) return BestRC; } } diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index c8946010e9d1..d10ca1a7ff91 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -73,8 +73,9 @@ void VirtRegMap::grow() { } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { - int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); + int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Align); ++NumSpillSlots; return SS; } diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 85e1eaedfc61..a12f8adfafe5 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -112,10 +113,8 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { continue; } while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { - unsigned StringOffset = AccelSection.getU32(&DataOffset); - RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4); - if (Reloc != Relocs.end()) - StringOffset += Reloc->second.second; + unsigned StringOffset = + getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs); if (!StringOffset) break; OS << format(" Name: %08x \"%s\"\n", StringOffset, diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index bbb19b5e998d..7e8d04672c03 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -56,6 +56,16 @@ typedef DWARFDebugLine::LineTable DWARFLineTable; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; +uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size, + uint32_t *Off, const RelocAddrMap *Relocs) { + if (!Relocs) + return Data.getUnsigned(Off, Size); + RelocAddrMap::const_iterator AI = Relocs->find(*Off); + if (AI == Relocs->end()) + return Data.getUnsigned(Off, Size); + return Data.getUnsigned(Off, Size) + AI->second.second; +} + static void dumpAccelSection(raw_ostream &OS, StringRef Name, const DWARFSection& Section, StringRef StringSection, bool LittleEndian) { @@ -212,11 +222,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, // sizes, but for simplicity we just use the address byte size of the last // compile unit (there is no easy and fast way to associate address range // list and the compile unit it describes). - DataExtractor rangesData(getRangeSection(), isLittleEndian(), + DataExtractor rangesData(getRangeSection().Data, isLittleEndian(), savedAddressByteSize); offset = 0; DWARFDebugRangeList rangeList; - while (rangeList.extract(rangesData, &offset)) + while (rangeList.extract(rangesData, &offset, getRangeSection().Relocs)) rangeList.dump(OS); } @@ -722,7 +732,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, *SectionData = data; if (name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection = data; + RangeDWOSection.Data = data; } } else if (name == "debug_types") { // Find debug_types data by section rather than name as there are @@ -763,6 +773,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, .Case("debug_loc", &LocSection.Relocs) .Case("debug_info.dwo", &InfoDWOSection.Relocs) .Case("debug_line", &LineSection.Relocs) + .Case("debug_ranges", &RangeSection.Relocs) .Case("apple_names", &AppleNamesSection.Relocs) .Case("apple_types", &AppleTypesSection.Relocs) .Case("apple_namespaces", &AppleNamespacesSection.Relocs) @@ -845,7 +856,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { .Case("debug_frame", &DebugFrameSection) .Case("eh_frame", &EHFrameSection) .Case("debug_str", &StringSection) - .Case("debug_ranges", &RangeSection) + .Case("debug_ranges", &RangeSection.Data) .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) .Case("debug_pubtypes", &PubTypesSection) diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index e4670519b797..ff6ed9c6741d 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -302,16 +303,9 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - { - // If this address is in our relocation map, apply the relocation. - RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr); - if (AI != RMap->end()) { - const std::pair &R = AI->second; - State.Row.Address = - debug_line_data.getAddress(offset_ptr) + R.second; - } else - State.Row.Address = debug_line_data.getAddress(offset_ptr); - } + State.Row.Address = + getRelocatedValue(debug_line_data, debug_line_data.getAddressSize(), + offset_ptr, RMap); break; case DW_LNE_define_file: diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index e2799ab2d243..d5c34216ed53 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -48,18 +49,10 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) { // 2.6.2 Location Lists // A location list entry consists of: while (true) { + // A beginning and ending address offsets. Entry E; - RelocAddrMap::const_iterator AI = RelocMap.find(Offset); - // 1. A beginning address offset. ... - E.Begin = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.Begin += AI->second.second; - - AI = RelocMap.find(Offset); - // 2. An ending address offset. ... - E.End = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.End += AI->second.second; + E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); + E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index f1d82fda8c06..9380fe8fe85d 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -22,7 +23,8 @@ void DWARFDebugRangeList::clear() { Entries.clear(); } -bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { +bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr, + const RelocAddrMap &Relocs) { clear(); if (!data.isValidOffset(*offset_ptr)) return false; @@ -33,8 +35,11 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { while (true) { RangeListEntry entry; uint32_t prev_offset = *offset_ptr; - entry.StartAddress = data.getAddress(offset_ptr); - entry.EndAddress = data.getAddress(offset_ptr); + entry.StartAddress = + getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); + entry.EndAddress = + getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); + // Check that both values were extracted correctly. if (*offset_ptr != prev_offset + 2 * AddressSize) { clear(); diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 6de57b999adc..28592e4dfb65 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -334,11 +334,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data, (Form == DW_FORM_addr) ? U->getAddressByteSize() : U->getRefAddrByteSize(); - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - if (AI != U->getRelocMap()->end()) { - Value.uval = data.getUnsigned(offset_ptr, AddrSize) + AI->second.second; - } else - Value.uval = data.getUnsigned(offset_ptr, AddrSize); + Value.uval = + getRelocatedValue(data, AddrSize, offset_ptr, U->getRelocMap()); break; } case DW_FORM_exprloc: @@ -376,12 +373,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data, case DW_FORM_ref_sup4: case DW_FORM_strx4: case DW_FORM_addrx4: { - Value.uval = data.getU32(offset_ptr); - if (!U) - break; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr-4); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + const RelocAddrMap* RelocMap = U ? U->getRelocMap() : nullptr; + Value.uval = getRelocatedValue(data, 4, offset_ptr, RelocMap); break; } case DW_FORM_data8: @@ -411,11 +404,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data, case DW_FORM_strp_sup: { if (!U) return false; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - uint8_t Size = U->getDwarfOffsetByteSize(); - Value.uval = data.getUnsigned(offset_ptr, Size); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + Value.uval = getRelocatedValue(data, U->getDwarfOffsetByteSize(), + offset_ptr, U->getRelocMap()); break; } case DW_FORM_flag_present: diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index c3f467745402..f50487fc3ba3 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -32,7 +32,7 @@ using namespace llvm; using namespace dwarf; void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { - parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(), + parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(), C.getStringSection(), StringRef(), C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(), false); } @@ -40,16 +40,17 @@ void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { void DWARFUnitSectionBase::parseDWO(DWARFContext &C, const DWARFSection &DWOSection, DWARFUnitIndex *Index) { - parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(), + parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(), C.getStringDWOSection(), C.getStringOffsetDWOSection(), C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(), true); } DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, - bool IsDWO, const DWARFUnitSectionBase &UnitSection, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, StringRef SOS, StringRef AOS, StringRef LS, + bool LE, bool IsDWO, + const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry) : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), LineSection(LS), StringSection(SS), StringOffsetSection([&]() { @@ -142,9 +143,10 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(!DieArray.empty()); - DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize); + DataExtractor RangesData(RangeSection->Data, isLittleEndian, AddrSize); uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; - return RangeList.extract(RangesData, &ActualRangeListOffset); + return RangeList.extract(RangesData, &ActualRangeListOffset, + RangeSection->Relocs); } void DWARFUnit::clear() { diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index 5e8c0bdc171d..4e2474c51cb1 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h" #include "llvm/DebugInfo/PDB/DIA/DIASession.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" @@ -720,7 +721,7 @@ uint32_t DIARawSymbol::getVirtualTableShapeId() const { return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId); } -std::unique_ptr +std::unique_ptr DIARawSymbol::getVirtualBaseTableType() const { CComPtr TableType; if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType) @@ -729,7 +730,7 @@ DIARawSymbol::getVirtualBaseTableType() const { auto RawVT = llvm::make_unique(Session, TableType); auto Pointer = llvm::make_unique(Session, std::move(RawVT)); - return unique_dyn_cast(Pointer->getPointeeType()); + return unique_dyn_cast(Pointer->getPointeeType()); } PDB_DataKind DIARawSymbol::getDataKind() const { diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp index 6ecf335812b5..ef47b92b4f2f 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -21,12 +21,22 @@ #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::pdb; -static Error ErrorFromHResult(HRESULT Result, StringRef Context) { +template +static Error ErrorFromHResult(HRESULT Result, const char *Str, Ts &&... Args) { + SmallString<64> MessageStorage; + StringRef Context; + if (sizeof...(Args) > 0) { + MessageStorage = formatv(Str, std::forward(Args)...).str(); + Context = MessageStorage; + } else + Context = Str; + switch (Result) { case E_PDB_NOT_FOUND: return make_error(generic_error_code::invalid_path, Context); @@ -95,8 +105,9 @@ Error DIASession::createFromPdb(StringRef Path, const wchar_t *Path16Str = reinterpret_cast(Path16.data()); HRESULT HR; - if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str))) - return ErrorFromHResult(HR, "Calling loadDataFromPdb"); + if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str))) { + return ErrorFromHResult(HR, "Calling loadDataFromPdb {0}", Path); + } if (FAILED(HR = DiaDataSource->openSession(&DiaSession))) return ErrorFromHResult(HR, "Calling openSession"); diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp index 08798cf0ed28..e87e2c407593 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/ModStream.cpp @@ -82,4 +82,8 @@ ModStream::lines(bool *HadError) const { return make_range(LineInfo.begin(HadError), LineInfo.end()); } +bool ModStream::hasLineInfo() const { + return C13LinesSubstream.getLength() > 0 || LinesSubstream.getLength() > 0; +} + Error ModStream::commit() { return Error::success(); } diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 3aba35adb53f..70968d4330b0 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" #include "llvm/Support/ConvertUTF.h" @@ -320,7 +321,7 @@ uint32_t NativeRawSymbol::getVirtualTableShapeId() const { return 0; } -std::unique_ptr +std::unique_ptr NativeRawSymbol::getVirtualBaseTableType() const { return nullptr; } diff --git a/contrib/llvm/lib/DebugInfo/PDB/UDTLayout.cpp b/contrib/llvm/lib/DebugInfo/PDB/UDTLayout.cpp index 61cef093d4ce..aacefae80c3a 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/UDTLayout.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/UDTLayout.cpp @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" @@ -39,36 +40,47 @@ static uint32_t getTypeLength(const PDBSymbol &Symbol) { return RawType.getLength(); } -StorageItemBase::StorageItemBase(const UDTLayoutBase &Parent, - const PDBSymbol &Symbol, - const std::string &Name, - uint32_t OffsetInParent, uint32_t Size) - : Parent(Parent), Symbol(Symbol), Name(Name), - OffsetInParent(OffsetInParent), SizeOf(Size) { +LayoutItemBase::LayoutItemBase(const UDTLayoutBase *Parent, + const PDBSymbol *Symbol, const std::string &Name, + uint32_t OffsetInParent, uint32_t Size, + bool IsElided) + : Symbol(Symbol), Parent(Parent), Name(Name), + OffsetInParent(OffsetInParent), SizeOf(Size), LayoutSize(Size), + IsElided(IsElided) { UsedBytes.resize(SizeOf, true); } -uint32_t StorageItemBase::deepPaddingSize() const { - // sizeof(Field) - sizeof(typeof(Field)) is trailing padding. - return SizeOf - getTypeLength(Symbol); +uint32_t LayoutItemBase::deepPaddingSize() const { + return UsedBytes.size() - UsedBytes.count(); +} + +uint32_t LayoutItemBase::tailPadding() const { + int Last = UsedBytes.find_last(); + + return UsedBytes.size() - (Last + 1); } DataMemberLayoutItem::DataMemberLayoutItem( - const UDTLayoutBase &Parent, std::unique_ptr DataMember) - : StorageItemBase(Parent, *DataMember, DataMember->getName(), - DataMember->getOffset(), getTypeLength(*DataMember)), - DataMember(std::move(DataMember)) { - auto Type = this->DataMember->getType(); + const UDTLayoutBase &Parent, std::unique_ptr Member) + : LayoutItemBase(&Parent, Member.get(), Member->getName(), + Member->getOffset(), getTypeLength(*Member), false), + DataMember(std::move(Member)) { + auto Type = DataMember->getType(); if (auto UDT = unique_dyn_cast(Type)) { - // UDT data members might have padding in between fields, but otherwise - // a member should occupy its entire storage. - UsedBytes.resize(SizeOf, false); UdtLayout = llvm::make_unique(std::move(UDT)); + UsedBytes = UdtLayout->usedBytes(); } } +VBPtrLayoutItem::VBPtrLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr Sym, + uint32_t Offset, uint32_t Size) + : LayoutItemBase(&Parent, Sym.get(), "", Offset, Size, false), + Type(std::move(Sym)) { +} + const PDBSymbolData &DataMemberLayoutItem::getDataMember() { - return *dyn_cast(&Symbol); + return *dyn_cast(Symbol); } bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; } @@ -77,60 +89,73 @@ const ClassLayout &DataMemberLayoutItem::getUDTLayout() const { return *UdtLayout; } -uint32_t DataMemberLayoutItem::deepPaddingSize() const { - uint32_t Result = StorageItemBase::deepPaddingSize(); - if (UdtLayout) - Result += UdtLayout->deepPaddingSize(); - return Result; -} - VTableLayoutItem::VTableLayoutItem(const UDTLayoutBase &Parent, - std::unique_ptr VTable) - : StorageItemBase(Parent, *VTable, "", 0, getTypeLength(*VTable)), - VTable(std::move(VTable)) { - auto VTableType = cast(this->VTable->getType()); + std::unique_ptr VT) + : LayoutItemBase(&Parent, VT.get(), "", 0, getTypeLength(*VT), false), + VTable(std::move(VT)) { + auto VTableType = cast(VTable->getType()); ElementSize = VTableType->getLength(); - - Shape = - unique_dyn_cast(VTableType->getPointeeType()); - if (Shape) - VTableFuncs.resize(Shape->getCount()); } -UDTLayoutBase::UDTLayoutBase(const PDBSymbol &Symbol, const std::string &Name, - uint32_t Size) - : SymbolBase(Symbol), Name(Name), SizeOf(Size) { - UsedBytes.resize(Size); - ChildrenPerByte.resize(Size); - initializeChildren(Symbol); +UDTLayoutBase::UDTLayoutBase(const UDTLayoutBase *Parent, const PDBSymbol &Sym, + const std::string &Name, uint32_t OffsetInParent, + uint32_t Size, bool IsElided) + : LayoutItemBase(Parent, &Sym, Name, OffsetInParent, Size, IsElided) { + // UDT storage comes from a union of all the children's storage, so start out + // uninitialized. + UsedBytes.reset(0, Size); + + initializeChildren(Sym); + if (LayoutSize < Size) + UsedBytes.resize(LayoutSize); +} + +uint32_t UDTLayoutBase::tailPadding() const { + uint32_t Abs = LayoutItemBase::tailPadding(); + if (!LayoutItems.empty()) { + const LayoutItemBase *Back = LayoutItems.back(); + uint32_t ChildPadding = Back->LayoutItemBase::tailPadding(); + if (Abs < ChildPadding) + Abs = 0; + else + Abs -= ChildPadding; + } + return Abs; } ClassLayout::ClassLayout(const PDBSymbolTypeUDT &UDT) - : UDTLayoutBase(UDT, UDT.getName(), UDT.getLength()), UDT(UDT) {} + : UDTLayoutBase(nullptr, UDT, UDT.getName(), 0, UDT.getLength(), false), + UDT(UDT) { + ImmediateUsedBytes.resize(SizeOf, false); + for (auto &LI : LayoutItems) { + uint32_t Begin = LI->getOffsetInParent(); + uint32_t End = Begin + LI->getLayoutSize(); + End = std::min(SizeOf, End); + ImmediateUsedBytes.set(Begin, End); + } +} ClassLayout::ClassLayout(std::unique_ptr UDT) : ClassLayout(*UDT) { OwnedStorage = std::move(UDT); } +uint32_t ClassLayout::immediatePadding() const { + return SizeOf - ImmediateUsedBytes.count(); +} + BaseClassLayout::BaseClassLayout(const UDTLayoutBase &Parent, - std::unique_ptr Base) - : UDTLayoutBase(*Base, Base->getName(), Base->getLength()), - StorageItemBase(Parent, *Base, Base->getName(), Base->getOffset(), - Base->getLength()), - Base(std::move(Base)) { - IsVirtualBase = this->Base->isVirtualBaseClass(); -} - -uint32_t UDTLayoutBase::shallowPaddingSize() const { - return UsedBytes.size() - UsedBytes.count(); -} - -uint32_t UDTLayoutBase::deepPaddingSize() const { - uint32_t Result = shallowPaddingSize(); - for (auto &Child : ChildStorage) - Result += Child->deepPaddingSize(); - return Result; + uint32_t OffsetInParent, bool Elide, + std::unique_ptr B) + : UDTLayoutBase(&Parent, *B, B->getName(), OffsetInParent, B->getLength(), + Elide), + Base(std::move(B)) { + if (isEmptyBase()) { + // Special case an empty base so that it doesn't get treated as padding. + UsedBytes.resize(1); + UsedBytes.set(0); + } + IsVirtualBase = Base->isVirtualBaseClass(); } void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { @@ -138,15 +163,16 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { // followed by functions, followed by other. This ordering is necessary // so that bases and vtables get initialized before any functions which // may override them. - UniquePtrVector Bases; UniquePtrVector VTables; UniquePtrVector Members; + UniquePtrVector VirtualBaseSyms; + auto Children = Sym.findAllChildren(); while (auto Child = Children->getNext()) { if (auto Base = unique_dyn_cast(Child)) { if (Base->isVirtualBaseClass()) - VirtualBases.push_back(std::move(Base)); + VirtualBaseSyms.push_back(std::move(Base)); else Bases.push_back(std::move(Base)); } @@ -164,20 +190,33 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { Other.push_back(std::move(Child)); } - for (auto &Base : Bases) { - auto BL = llvm::make_unique(*this, std::move(Base)); - BaseClasses.push_back(BL.get()); + // We don't want to have any re-allocations in the list of bases, so make + // sure to reserve enough space so that our ArrayRefs don't get invalidated. + AllBases.reserve(Bases.size() + VirtualBaseSyms.size()); + // Only add non-virtual bases to the class first. Only at the end of the + // class, after all non-virtual bases and data members have been added do we + // add virtual bases. This way the offsets are correctly aligned when we go + // to lay out virtual bases. + for (auto &Base : Bases) { + uint32_t Offset = Base->getOffset(); + // Non-virtual bases never get elided. + auto BL = llvm::make_unique(*this, Offset, false, + std::move(Base)); + + AllBases.push_back(BL.get()); addChildToLayout(std::move(BL)); } + NonVirtualBases = AllBases; - for (auto &VT : VTables) { - auto VTLayout = llvm::make_unique(*this, std::move(VT)); + assert(VTables.size() <= 1); + if (!VTables.empty()) { + auto VTLayout = + llvm::make_unique(*this, std::move(VTables[0])); VTable = VTLayout.get(); addChildToLayout(std::move(VTLayout)); - continue; } for (auto &Data : Members) { @@ -186,150 +225,74 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { addChildToLayout(std::move(DM)); } - for (auto &Func : Funcs) { - if (!Func->isVirtual()) - continue; - - if (Func->isIntroVirtualFunction()) - addVirtualIntro(*Func); - else - addVirtualOverride(*Func); - } -} - -void UDTLayoutBase::addVirtualIntro(PDBSymbolFunc &Func) { - // Kind of a hack, but we prefer the more common destructor name that people - // are familiar with, e.g. ~ClassName. It seems there are always both and - // the vector deleting destructor overwrites the nice destructor, so just - // ignore the vector deleting destructor. - if (Func.getName() == "__vecDelDtor") - return; - - if (!VTable) { - // FIXME: Handle this. What's most likely happening is we have an intro - // virtual in a derived class where the base also has an intro virtual. - // In this case the vtable lives in the base. What we really need is - // for each UDTLayoutBase to contain a list of all its vtables, and - // then propagate this list up the hierarchy so that derived classes have - // direct access to their bases' vtables. - return; - } - - uint32_t Stride = VTable->getElementSize(); - - uint32_t Index = Func.getVirtualBaseOffset(); - assert(Index % Stride == 0); - Index /= Stride; - - VTable->setFunction(Index, Func); -} - -VTableLayoutItem *UDTLayoutBase::findVTableAtOffset(uint32_t RelativeOffset) { - if (VTable && VTable->getOffsetInParent() == RelativeOffset) - return VTable; - for (auto Base : BaseClasses) { - uint32_t Begin = Base->getOffsetInParent(); - uint32_t End = Begin + Base->getSize(); - if (RelativeOffset < Begin || RelativeOffset >= End) - continue; - - return Base->findVTableAtOffset(RelativeOffset - Begin); - } - - return nullptr; -} - -void UDTLayoutBase::addVirtualOverride(PDBSymbolFunc &Func) { - auto Signature = Func.getSignature(); - auto ThisAdjust = Signature->getThisAdjust(); - // ThisAdjust tells us which VTable we're looking for. Specifically, it's - // the offset into the current class of the VTable we're looking for. So - // look through the base hierarchy until we find one such that - // AbsoluteOffset(VT) == ThisAdjust - VTableLayoutItem *VT = findVTableAtOffset(ThisAdjust); - if (!VT) { - // FIXME: There really should be a vtable here. If there's not it probably - // means that the vtable is in a virtual base, which we don't yet support. - assert(!VirtualBases.empty()); - return; - } - int32_t OverrideIndex = -1; - // Now we've found the VTable. Func will not have a virtual base offset set, - // so instead we need to compare names and signatures. We iterate each item - // in the VTable. All items should already have non null entries because they - // were initialized by the intro virtual, which was guaranteed to come before. - for (auto ItemAndIndex : enumerate(VT->funcs())) { - auto Item = ItemAndIndex.value(); - assert(Item); - // If the name doesn't match, this isn't an override. Note that it's ok - // for the return type to not match (e.g. co-variant return). - if (Item->getName() != Func.getName()) { - if (Item->isDestructor() && Func.isDestructor()) { - OverrideIndex = ItemAndIndex.index(); - break; - } - continue; - } - // Now make sure it's the right overload. Get the signature of the existing - // vtable method and make sure it has the same arglist and the same cv-ness. - auto ExistingSig = Item->getSignature(); - if (ExistingSig->isConstType() != Signature->isConstType()) - continue; - if (ExistingSig->isVolatileType() != Signature->isVolatileType()) - continue; - - // Now compare arguments. Using the raw bytes of the PDB this would be - // trivial - // because there is an ArgListId and they should be identical. But DIA - // doesn't - // expose this, so the best we can do is iterate each argument and confirm - // that - // each one is identical. - if (ExistingSig->getCount() != Signature->getCount()) - continue; - bool IsMatch = true; - auto ExistingEnumerator = ExistingSig->getArguments(); - auto NewEnumerator = Signature->getArguments(); - for (uint32_t I = 0; I < ExistingEnumerator->getChildCount(); ++I) { - auto ExistingArg = ExistingEnumerator->getNext(); - auto NewArg = NewEnumerator->getNext(); - if (ExistingArg->getSymIndexId() != NewArg->getSymIndexId()) { - IsMatch = false; - break; + // Make sure add virtual bases before adding functions, since functions may be + // overrides of virtual functions declared in a virtual base, so the VTables + // and virtual intros need to be correctly initialized. + for (auto &VB : VirtualBaseSyms) { + int VBPO = VB->getVirtualBasePointerOffset(); + if (!hasVBPtrAtOffset(VBPO)) { + if (auto VBP = VB->getRawSymbol().getVirtualBaseTableType()) { + auto VBPL = llvm::make_unique(*this, std::move(VBP), + VBPO, VBP->getLength()); + VBPtr = VBPL.get(); + addChildToLayout(std::move(VBPL)); } } - if (!IsMatch) - continue; - // It's a match! Stick the new function into the VTable. - OverrideIndex = ItemAndIndex.index(); - break; + // Virtual bases always go at the end. So just look for the last place we + // ended when writing something, and put our virtual base there. + // Note that virtual bases get elided unless this is a top-most derived + // class. + uint32_t Offset = UsedBytes.find_last() + 1; + bool Elide = (Parent != nullptr); + auto BL = + llvm::make_unique(*this, Offset, Elide, std::move(VB)); + AllBases.push_back(BL.get()); + + // Only lay this virtual base out directly inside of *this* class if this + // is a top-most derived class. Keep track of it regardless, but only + // physically lay it out if it's a topmost derived class. + addChildToLayout(std::move(BL)); } - if (OverrideIndex == -1) { - // FIXME: This is probably due to one of the other FIXMEs in this file. - return; - } - VT->setFunction(OverrideIndex, Func); + VirtualBases = makeArrayRef(AllBases).drop_front(NonVirtualBases.size()); + + if (Parent != nullptr) + LayoutSize = UsedBytes.find_last() + 1; } -void UDTLayoutBase::addChildToLayout(std::unique_ptr Child) { +bool UDTLayoutBase::hasVBPtrAtOffset(uint32_t Off) const { + if (VBPtr && VBPtr->getOffsetInParent() == Off) + return true; + for (BaseClassLayout *BL : AllBases) { + if (BL->hasVBPtrAtOffset(Off - BL->getOffsetInParent())) + return true; + } + return false; +} + +void UDTLayoutBase::addChildToLayout(std::unique_ptr Child) { uint32_t Begin = Child->getOffsetInParent(); - uint32_t End = Begin + Child->getSize(); - // Due to the empty base optimization, End might point outside the bounds of - // the parent class. If that happens, just clamp the value. - End = std::min(End, getClassSize()); - UsedBytes.set(Begin, End); - while (Begin != End) { - ChildrenPerByte[Begin].push_back(Child.get()); - ++Begin; + if (!Child->isElided()) { + BitVector ChildBytes = Child->usedBytes(); + + // Suppose the child occupies 4 bytes starting at offset 12 in a 32 byte + // class. When we call ChildBytes.resize(32), the Child's storage will + // still begin at offset 0, so we need to shift it left by offset bytes + // to get it into the right position. + ChildBytes.resize(UsedBytes.size()); + ChildBytes <<= Child->getOffsetInParent(); + UsedBytes |= ChildBytes; + + if (ChildBytes.count() > 0) { + auto Loc = std::upper_bound(LayoutItems.begin(), LayoutItems.end(), Begin, + [](uint32_t Off, const LayoutItemBase *Item) { + return (Off < Item->getOffsetInParent()); + }); + + LayoutItems.insert(Loc, Child.get()); + } } - auto Loc = std::upper_bound( - ChildStorage.begin(), ChildStorage.end(), Begin, - [](uint32_t Off, const std::unique_ptr &Item) { - return Off < Item->getOffsetInParent(); - }); - - ChildStorage.insert(Loc, std::move(Child)); + ChildStorage.push_back(std::move(Child)); } \ No newline at end of file diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index d0b77e7218b9..b7de07170de9 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -1103,35 +1103,34 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (const ConstantFP *CFP = dyn_cast(CV)) { - if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle() || - &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) { + const APFloat &APF = CFP->getValueAPF(); + if (&APF.getSemantics() == &APFloat::IEEEsingle() || + &APF.getSemantics() == &APFloat::IEEEdouble()) { // We would like to output the FP constant value in exponential notation, // but we cannot do this if doing so will lose precision. Check here to // make sure that we only output it in exponential format if we can parse // the value back and get the same value. // bool ignored; - bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble(); - bool isInf = CFP->getValueAPF().isInfinity(); - bool isNaN = CFP->getValueAPF().isNaN(); + bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble(); + bool isInf = APF.isInfinity(); + bool isNaN = APF.isNaN(); if (!isInf && !isNaN) { - double Val = isDouble ? CFP->getValueAPF().convertToDouble() : - CFP->getValueAPF().convertToFloat(); + double Val = isDouble ? APF.convertToDouble() : APF.convertToFloat(); SmallString<128> StrVal; - raw_svector_ostream(StrVal) << Val; - + APF.toString(StrVal, 6, 0, false); // Check to make sure that the stringized number is not some string like // "Inf" or NaN, that atof will accept, but the lexer will not. Check // that the string matches the "[-+]?[0-9]" regex. // - if ((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) { - // Reparse stringized version! - if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { - Out << StrVal; - return; - } + assert(((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + "[-+]?[0-9] regex does not match!"); + // Reparse stringized version! + if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { + Out << StrVal; + return; } } // Otherwise we could not reparse it to exactly the same value, so we must @@ -1140,7 +1139,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // x86, so we must not use these types. static_assert(sizeof(double) == sizeof(uint64_t), "assuming that double is 64 bits!"); - APFloat apf = CFP->getValueAPF(); + APFloat apf = APF; // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, @@ -1153,27 +1152,27 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // These appear as a magic letter identifying the type, then a // fixed number of hex digits. Out << "0x"; - APInt API = CFP->getValueAPF().bitcastToAPInt(); - if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended()) { + APInt API = APF.bitcastToAPInt(); + if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) { Out << 'K'; Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, /*Upper=*/true); Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); return; - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad()) { + } else if (&APF.getSemantics() == &APFloat::IEEEquad()) { Out << 'L'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble()) { + } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) { Out << 'M'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf()) { + } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) { Out << 'H'; Out << format_hex_no_prefix(API.getZExtValue(), 4, /*Upper=*/true); diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h index 09f037365793..cf2925254695 100644 --- a/contrib/llvm/lib/IR/AttributeImpl.h +++ b/contrib/llvm/lib/IR/AttributeImpl.h @@ -255,17 +255,10 @@ class AttributeListImpl final /// \brief Retrieve the attribute set node for the given "slot" in the /// AttrNode list. - AttributeSet getSlotNode(unsigned Slot) const { + AttributeSet getSlotAttributes(unsigned Slot) const { return getSlotPair(Slot)->second; } - /// \brief Retrieve the attributes for the given "slot" in the AttrNode list. - /// \p Slot is an index into the AttrNodes list, not the index of the return / - /// parameter/ function which the attributes apply to. - AttributeList getSlotAttributes(unsigned Slot) const { - return AttributeList::get(Context, *getSlotPair(Slot)); - } - /// \brief Return true if the AttributeSet or the FunctionIndex has an /// enum attribute of the given kind. bool hasFnAttribute(Attribute::AttrKind Kind) const { @@ -273,8 +266,10 @@ class AttributeListImpl final } typedef AttributeSet::iterator iterator; - iterator begin(unsigned Slot) const { return getSlotNode(Slot).begin(); } - iterator end(unsigned Slot) const { return getSlotNode(Slot).end(); } + iterator begin(unsigned Slot) const { + return getSlotAttributes(Slot).begin(); + } + iterator end(unsigned Slot) const { return getSlotAttributes(Slot).end(); } void Profile(FoldingSetNodeID &ID) const; static void Profile(FoldingSetNodeID &ID, diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index d690111ef210..e30414537a6c 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -955,13 +955,13 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, for (unsigned Index : Indices) { // Add all attribute slots before the current index. for (; I < E && getSlotIndex(I) < Index; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); // Add the attribute at this index. If we already have attributes at this // index, merge them into a new set. AttrBuilder B; if (I < E && getSlotIndex(I) == Index) { - B.merge(AttrBuilder(pImpl->getSlotNode(I))); + B.merge(AttrBuilder(pImpl->getSlotAttributes(I))); ++I; } B.addAttribute(A); @@ -970,7 +970,7 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, // Add remaining attributes. for (; I < E; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); return get(C, AttrVec); } @@ -1008,13 +1008,13 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, for (I = 0; I < NumAttrs; ++I) { if (getSlotIndex(I) >= Index) break; - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); } AttrBuilder NewAttrs; if (I < NumAttrs && getSlotIndex(I) == Index) { // We need to merge the attribute sets. - NewAttrs.merge(pImpl->getSlotNode(I)); + NewAttrs.merge(pImpl->getSlotAttributes(I)); ++I; } NewAttrs.merge(B); @@ -1024,7 +1024,7 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, // Add the remaining entries. for (; I < NumAttrs; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); + AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); return get(C, AttrVec); } @@ -1063,11 +1063,11 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, for (unsigned I = 0, E = NumAttrs; I != E; ++I) { if (getSlotIndex(I) >= Index) { if (getSlotIndex(I) == Index) - B = AttrBuilder(pImpl->getSlotNode(LastIndex++)); + B = AttrBuilder(getSlotAttributes(LastIndex++)); break; } LastIndex = I + 1; - AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); + AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); } // Remove the attributes from the existing set and add them. @@ -1077,7 +1077,7 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, // Add the remaining attribute slots. for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); + AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); return get(C, AttrSets); } @@ -1091,7 +1091,7 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { unsigned Index = getSlotIndex(I); if (Index != WithoutIndex) - AttrSet.push_back({Index, pImpl->getSlotNode(I)}); + AttrSet.push_back({Index, pImpl->getSlotAttributes(I)}); } return get(C, AttrSet); } @@ -1220,7 +1220,7 @@ AttributeSet AttributeList::getAttributes(unsigned Index) const { // Loop through to find the attribute node we want. for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) if (pImpl->getSlotIndex(I) == Index) - return pImpl->getSlotNode(I); + return pImpl->getSlotAttributes(I); return AttributeSet(); } @@ -1251,7 +1251,7 @@ unsigned AttributeList::getSlotIndex(unsigned Slot) const { return pImpl->getSlotIndex(Slot); } -AttributeList AttributeList::getSlotAttributes(unsigned Slot) const { +AttributeSet AttributeList::getSlotAttributes(unsigned Slot) const { assert(pImpl && Slot < pImpl->getNumSlots() && "Slot # out of range!"); return pImpl->getSlotAttributes(Slot); diff --git a/contrib/llvm/lib/IR/GCOV.cpp b/contrib/llvm/lib/IR/GCOV.cpp index ba92a91cc917..d4b455228225 100644 --- a/contrib/llvm/lib/IR/GCOV.cpp +++ b/contrib/llvm/lib/IR/GCOV.cpp @@ -589,8 +589,12 @@ FileInfo::openCoveragePath(StringRef CoveragePath) { /// print - Print source files with collected line count information. void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, StringRef GCNOFile, StringRef GCDAFile) { - for (const auto &LI : LineInfo) { - StringRef Filename = LI.first(); + SmallVector Filenames; + for (const auto &LI : LineInfo) + Filenames.push_back(LI.first()); + std::sort(Filenames.begin(), Filenames.end()); + + for (StringRef Filename : Filenames) { auto AllLines = LineConsumer(Filename); std::string CoveragePath = getCoveragePath(Filename, MainFilename); @@ -603,7 +607,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, CovOS << " -: 0:Runs:" << RunCount << "\n"; CovOS << " -: 0:Programs:" << ProgramCount << "\n"; - const LineData &Line = LI.second; + const LineData &Line = LineInfo[Filename]; GCOVCoverage FileCoverage(Filename); for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty(); ++LineIndex) { diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp index b07c57685a26..d83bdf2acd43 100644 --- a/contrib/llvm/lib/IR/Value.cpp +++ b/contrib/llvm/lib/IR/Value.cpp @@ -432,6 +432,7 @@ namespace { enum PointerStripKind { PSK_ZeroIndices, PSK_ZeroIndicesAndAliases, + PSK_ZeroIndicesAndAliasesAndBarriers, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -450,6 +451,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { if (auto *GEP = dyn_cast(V)) { switch (StripKind) { case PSK_ZeroIndicesAndAliases: + case PSK_ZeroIndicesAndAliasesAndBarriers: case PSK_ZeroIndices: if (!GEP->hasAllZeroIndices()) return V; @@ -472,12 +474,20 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { return V; V = GA->getAliasee(); } else { - if (auto CS = ImmutableCallSite(V)) + if (auto CS = ImmutableCallSite(V)) { if (const Value *RV = CS.getReturnedArgOperand()) { V = RV; continue; } - + // The result of invariant.group.barrier must alias it's argument, + // but it can't be marked with returned attribute, that's why it needs + // special case. + if (StripKind == PSK_ZeroIndicesAndAliasesAndBarriers && + CS.getIntrinsicID() == Intrinsic::invariant_group_barrier) { + V = CS.getArgOperand(0); + continue; + } + } return V; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); @@ -499,6 +509,11 @@ const Value *Value::stripInBoundsConstantOffsets() const { return stripPointerCastsAndOffsets(this); } +const Value *Value::stripPointerCastsAndBarriers() const { + return stripPointerCastsAndOffsets( + this); +} + const Value * Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const { diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp index 9782c898bf50..1bc0d7361d4c 100644 --- a/contrib/llvm/lib/LTO/LTO.cpp +++ b/contrib/llvm/lib/LTO/LTO.cpp @@ -415,7 +415,8 @@ void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, // Flag as visible outside of ThinLTO if visible from a regular object or // if this is a reference in the regular LTO partition. GlobalRes.VisibleOutsideThinLTO |= - (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO)); + (Res.VisibleToRegularObj || Sym.isUsed() || + Partition == GlobalResolution::RegularLTO); } static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, diff --git a/contrib/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm/lib/LTO/LTOBackend.cpp index 4bd251f727a4..30447c528af1 100644 --- a/contrib/llvm/lib/LTO/LTOBackend.cpp +++ b/contrib/llvm/lib/LTO/LTOBackend.cpp @@ -25,7 +25,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/LTO/LTO.h" -#include "llvm/LTO/legacy/UpdateCompilerUsed.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Passes/PassBuilder.h" @@ -353,19 +352,6 @@ finalizeOptimizationRemarks(std::unique_ptr DiagOutputFile) { DiagOutputFile->os().flush(); } -static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) { - // Collect the list of undefined symbols used in asm and update - // llvm.compiler.used to prevent optimization to drop these from the output. - StringSet<> AsmUndefinedRefs; - ModuleSymbolTable::CollectAsmSymbols( - Mod, - [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { - if (Flags & object::BasicSymbolRef::SF_Undefined) - AsmUndefinedRefs.insert(Name); - }); - updateCompilerUsed(Mod, TM, AsmUndefinedRefs); -} - Error lto::backend(Config &C, AddStreamFn AddStream, unsigned ParallelCodeGenParallelismLevel, std::unique_ptr Mod, @@ -377,8 +363,6 @@ Error lto::backend(Config &C, AddStreamFn AddStream, std::unique_ptr TM = createTargetMachine(C, Mod->getTargetTriple(), *TOrErr); - handleAsmUndefinedRefs(*Mod, *TM); - // Setup optimization remarks. auto DiagFileOrErr = lto::setupOptimizationRemarks( Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness); @@ -416,8 +400,6 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, std::unique_ptr TM = createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr); - handleAsmUndefinedRefs(Mod, *TM); - if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); return Error::success(); diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp index 42e8ad340281..2fa9c03b608e 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -134,7 +134,7 @@ struct ParseStatementInfo { SmallVectorImpl *AsmRewrites = nullptr; - ParseStatementInfo() = default; + ParseStatementInfo() = delete; ParseStatementInfo(SmallVectorImpl *rewrites) : AsmRewrites(rewrites) {} }; @@ -737,6 +737,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { HadError = false; AsmCond StartingCondState = TheCondState; + SmallVector AsmStrRewrites; // If we are generating dwarf for assembly source files save the initial text // section and generate a .file directive. @@ -756,7 +757,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof)) { - ParseStatementInfo Info; + ParseStatementInfo Info(&AsmStrRewrites); if (!parseStatement(Info, nullptr)) continue; @@ -1650,7 +1651,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, } // Emit the label. - if (!ParsingInlineAsm) + if (!getTargetParser().isParsingInlineAsm()) Out.EmitLabel(Sym, IDLoc); // If we are generating dwarf for assembly source files then gather the @@ -2057,9 +2058,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // If parsing succeeded, match the instruction. if (!ParseHadError) { uint64_t ErrorInfo; - if (getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode, - Info.ParsedOperands, Out, - ErrorInfo, ParsingInlineAsm)) + if (getTargetParser().MatchAndEmitInstruction( + IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, + getTargetParser().isParsingInlineAsm())) return true; } return false; diff --git a/contrib/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm/lib/MC/WasmObjectWriter.cpp index 159cc3b4def2..6444046a30d7 100644 --- a/contrib/llvm/lib/MC/WasmObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WasmObjectWriter.cpp @@ -1105,7 +1105,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, encodeULEB128(wasm::WASM_SEC_CODE, getStream()); - encodeULEB128(CodeRelocations.size(), getStream()); + encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream()); WriteRelocations(CodeRelocations, getStream(), SymbolIndices); WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp index 23682e1fabfd..e89a4a315c46 100644 --- a/contrib/llvm/lib/Object/ELF.cpp +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -1,4 +1,4 @@ -//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===// +//===- ELF.cpp - ELF object file implementation ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,15 +8,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" -namespace llvm { -namespace object { +using namespace llvm; +using namespace object; #define ELF_RELOC(name, value) \ case ELF::name: \ return #name; \ -StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { +StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, + uint32_t Type) { switch (Machine) { case ELF::EM_X86_64: switch (Type) { @@ -139,6 +141,3 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { } #undef ELF_RELOC - -} // end namespace object -} // end namespace llvm diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp index 3f8c81c8e911..86f033bb6cbf 100644 --- a/contrib/llvm/lib/Object/ELFObjectFile.cpp +++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===// +//===- ELFObjectFile.cpp - ELF object file implementation -----------------===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Triple.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include +#include +#include +#include +#include +#include +#include -namespace llvm { +using namespace llvm; using namespace object; ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) @@ -299,5 +314,3 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { TheTriple.setArchName(Triple); } - -} // end namespace llvm diff --git a/contrib/llvm/lib/Object/IRSymtab.cpp b/contrib/llvm/lib/Object/IRSymtab.cpp index bb3d1b2cf695..5f0837882d60 100644 --- a/contrib/llvm/lib/Object/IRSymtab.cpp +++ b/contrib/llvm/lib/Object/IRSymtab.cpp @@ -1,4 +1,4 @@ -//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===// +//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,34 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/IRSymtab.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/StringSaver.h" +#include +#include +#include +#include using namespace llvm; using namespace irsymtab; @@ -25,6 +45,7 @@ namespace { struct Builder { SmallVector &Symtab; SmallVector &Strtab; + Builder(SmallVector &Symtab, SmallVector &Strtab) : Symtab(Symtab), Strtab(Strtab) {} @@ -49,6 +70,7 @@ struct Builder { S.Offset = StrtabBuilder.add(Value); S.Size = Value.size(); } + template void writeRange(storage::Range &R, const std::vector &Objs) { R.Offset = Symtab.size(); @@ -141,6 +163,9 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, Sym.ComdatIndex = -1; auto *GV = Msym.dyn_cast(); if (!GV) { + // Undefined module asm symbols act as GC roots and are implicitly used. + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_used; setStr(Sym.IRName, ""); return Error::success(); } @@ -228,7 +253,7 @@ Error Builder::build(ArrayRef IRMods) { return Error::success(); } -} // anonymous namespace +} // end anonymous namespace Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, SmallVector &Strtab) { diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index 1753d2baaedd..3d3fa07db3f4 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -1,4 +1,4 @@ -//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,32 +12,52 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/MachO.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include +#include "llvm/Support/SwapByteOrder.h" +#include +#include +#include +#include #include #include #include +#include +#include +#include using namespace llvm; using namespace object; namespace { + struct section_base { char sectname[16]; char segname[16]; }; -} + +} // end anonymous namespace static Error malformedError(Twine Msg) { @@ -1144,11 +1164,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64bits, Error &Err, uint32_t UniversalCputype, uint32_t UniversalIndex) - : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), - SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), - DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), - DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), - HasPageZeroSegment(false) { + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) { ErrorAsOutParameter ErrAsOutParam(&Err); uint64_t SizeOfHeaders; uint32_t cputype; @@ -2343,11 +2359,11 @@ StringRef MachOObjectFile::getFileFormatName() const { unsigned CPUType = getCPUType(*this); if (!is64Bit()) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return "Mach-O 32-bit i386"; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return "Mach-O arm"; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return "Mach-O 32-bit ppc"; default: return "Mach-O 32-bit unknown"; @@ -2355,11 +2371,11 @@ StringRef MachOObjectFile::getFileFormatName() const { } switch (CPUType) { - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return "Mach-O 64-bit x86-64"; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return "Mach-O arm64"; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return "Mach-O 64-bit ppc64"; default: return "Mach-O 64-bit unknown"; @@ -2368,17 +2384,17 @@ StringRef MachOObjectFile::getFileFormatName() const { Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return Triple::x86; - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return Triple::x86_64; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return Triple::arm; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return Triple::aarch64; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return Triple::ppc; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return Triple::ppc64; default: return Triple::UnknownArch; @@ -2571,8 +2587,7 @@ dice_iterator MachOObjectFile::end_dices() const { return dice_iterator(DiceRef(DRI, this)); } -ExportEntry::ExportEntry(ArrayRef T) - : Trie(T), Malformed(false), Done(false) {} +ExportEntry::ExportEntry(ArrayRef T) : Trie(T) {} void ExportEntry::moveToFirst() { pushNode(0); @@ -2641,9 +2656,7 @@ uint32_t ExportEntry::nodeOffset() const { } ExportEntry::NodeState::NodeState(const uint8_t *Ptr) - : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), - ImportName(nullptr), ChildCount(0), NextChildIndex(0), - ParentStringLength(0), IsExportNode(false) {} + : Start(Ptr), Current(Ptr) {} void ExportEntry::pushNode(uint64_t offset) { const uint8_t *Ptr = Trie.begin() + offset; @@ -2733,7 +2746,7 @@ void ExportEntry::moveNext() { iterator_range MachOObjectFile::exports(ArrayRef Trie) { ExportEntry Start(Trie); - if (Trie.size() == 0) + if (Trie.empty()) Start.moveToEnd(); else Start.moveToFirst(); @@ -2750,9 +2763,8 @@ iterator_range MachOObjectFile::exports() const { MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, ArrayRef Bytes, bool is64Bit) - : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), - SegmentIndex(-1), RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), - PointerSize(is64Bit ? 8 : 4), Done(false) {} + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4) {} void MachORebaseEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2794,7 +2806,7 @@ void MachORebaseEntry::moveNext() { More = false; Done = true; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n"); break; case MachO::REBASE_OPCODE_SET_TYPE_IMM: RebaseType = ImmValue; @@ -2807,8 +2819,8 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " - << "RebaseType=" << (int) RebaseType << "\n"); + dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); break; case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; @@ -2831,10 +2843,10 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: SegmentOffset += readULEB128(&error); @@ -2855,9 +2867,9 @@ void MachORebaseEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2881,9 +2893,9 @@ void MachORebaseEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2913,11 +2925,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2954,11 +2966,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2992,11 +3004,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -3041,11 +3053,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: *E = malformedError("bad rebase info (bad opcode value 0x" + @@ -3131,10 +3143,8 @@ iterator_range MachOObjectFile::rebaseTable(Error &Err) { MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, ArrayRef Bytes, bool is64Bit, Kind BK) - : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), - SegmentIndex(-1), LibraryOrdinalSet(false), Ordinal(0), Flags(0), - Addend(0), RemainingLoopCount(0), AdvanceAmount(0), BindType(0), - PointerSize(is64Bit ? 8 : 4), TableKind(BK), Done(false) {} + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4), TableKind(BK) {} void MachOBindEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -3189,7 +3199,7 @@ void MachOBindEntry::moveNext() { } More = false; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: if (TableKind == Kind::Weak) { @@ -3211,8 +3221,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: if (TableKind == Kind::Weak) { @@ -3241,8 +3251,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: if (TableKind == Kind::Weak) { @@ -3267,8 +3277,8 @@ void MachOBindEntry::moveNext() { Ordinal = 0; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: Flags = ImmValue; @@ -3288,8 +3298,8 @@ void MachOBindEntry::moveNext() { ++Ptr; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " - << "SymbolName=" << SymbolName << "\n"); + dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); if (TableKind == Kind::Weak) { if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) return; @@ -3306,8 +3316,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " - << "BindType=" << (int)BindType << "\n"); + dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); break; case MachO::BIND_OPCODE_SET_ADDEND_SLEB: Addend = readSLEB128(&error); @@ -3320,8 +3330,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " - << "Addend=" << Addend << "\n"); + dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); break; case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; @@ -3343,10 +3353,10 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::BIND_OPCODE_ADD_ADDR_ULEB: SegmentOffset += readULEB128(&error); @@ -3366,9 +3376,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::BIND_OPCODE_DO_BIND: AdvanceAmount = PointerSize; @@ -3395,9 +3405,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: if (TableKind == Kind::Lazy) { @@ -3452,11 +3462,11 @@ void MachOBindEntry::moveNext() { RemainingLoopCount = 0; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: if (TableKind == Kind::Lazy) { @@ -3501,10 +3511,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: if (TableKind == Kind::Lazy) { @@ -3568,11 +3577,11 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: *E = malformedError("bad bind info (bad opcode value 0x" + diff --git a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp index de1ddab88fd4..91f93a41032e 100644 --- a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation ==// +//==- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation -==// // // The LLVM Compiler Infrastructure // @@ -11,29 +11,38 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include + using namespace llvm; using namespace object; -static llvm::cl::opt IgnoreEmptyThinLTOIndexFile( - "ignore-empty-index-file", llvm::cl::ZeroOrMore, - llvm::cl::desc( +static cl::opt IgnoreEmptyThinLTOIndexFile( + "ignore-empty-index-file", cl::ZeroOrMore, + cl::desc( "Ignore an empty index file and perform non-ThinLTO compilation"), - llvm::cl::init(false)); + cl::init(false)); ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( MemoryBufferRef Object, std::unique_ptr I) : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { } -ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() {} +ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() = default; std::unique_ptr ModuleSummaryIndexObjectFile::takeIndex() { return std::move(Index); diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp index 9a935d8e0869..a5b42725d817 100644 --- a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp @@ -1,4 +1,4 @@ -//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===// +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===// // // The LLVM Compiler Infrastructure // @@ -13,27 +13,45 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Object/ObjectFile.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + using namespace llvm; using namespace object; diff --git a/contrib/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm/lib/Object/RecordStreamer.cpp index c9c27451f809..e94e9cfed394 100644 --- a/contrib/llvm/lib/Object/RecordStreamer.cpp +++ b/contrib/llvm/lib/Object/RecordStreamer.cpp @@ -9,6 +9,7 @@ #include "RecordStreamer.h" #include "llvm/MC/MCSymbol.h" + using namespace llvm; void RecordStreamer::markDefined(const MCSymbol &Symbol) { @@ -69,14 +70,14 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) { void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } +RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + RecordStreamer::const_iterator RecordStreamer::begin() { return Symbols.begin(); } RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } -RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} - void RecordStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) { MCStreamer::EmitInstruction(Inst, STI); diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h index a845ecd786a8..4d119091a3d2 100644 --- a/contrib/llvm/lib/Object/RecordStreamer.h +++ b/contrib/llvm/lib/Object/RecordStreamer.h @@ -1,4 +1,4 @@ -//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===// +//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,9 +10,16 @@ #ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H #define LLVM_LIB_OBJECT_RECORDSTREAMER_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/SMLoc.h" +#include namespace llvm { + class RecordStreamer : public MCStreamer { public: enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, @@ -24,16 +31,19 @@ class RecordStreamer : public MCStreamer { // their symbol binding after parsing complete. This maps from each // aliasee to its list of aliases. DenseMap> SymverAliasMap; + void markDefined(const MCSymbol &Symbol); void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); void markUsed(const MCSymbol &Symbol); void visitUsedSymbol(const MCSymbol &Sym) override; public: - typedef StringMap::const_iterator const_iterator; + RecordStreamer(MCContext &Context); + + using const_iterator = StringMap::const_iterator; + const_iterator begin(); const_iterator end(); - RecordStreamer(MCContext &Context); void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) override; void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; @@ -50,6 +60,7 @@ class RecordStreamer : public MCStreamer { DenseMap> &symverAliases() { return SymverAliasMap; } + /// Get the state recorded for the given symbol. State getSymbolState(const MCSymbol *Sym) { auto SI = Symbols.find(Sym->getName()); @@ -58,5 +69,7 @@ class RecordStreamer : public MCStreamer { return SI->second; } }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_OBJECT_RECORDSTREAMER_H diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp index fc1dca35424e..9f3486e58a11 100644 --- a/contrib/llvm/lib/Object/WasmObjectFile.cpp +++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" @@ -22,7 +23,9 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/Wasm.h" #include +#include #include +#include #include using namespace llvm; @@ -141,7 +144,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { Expr.Value.Float64 = readFloat64(Ptr); break; case wasm::WASM_OPCODE_GET_GLOBAL: - Expr.Value.Global = readUint32(Ptr); + Expr.Value.Global = readULEB128(Ptr); break; default: return make_error("Invalid opcode in init_expr", @@ -180,7 +183,7 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr, } WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) - : ObjectFile(Binary::ID_Wasm, Buffer), StartFunction(-1) { + : ObjectFile(Binary::ID_Wasm, Buffer) { ErrorAsOutParameter ErrAsOutParam(&Err); Header.Magic = getData().substr(0, 4); if (Header.Magic != StringRef("\0asm", 4)) { @@ -252,7 +255,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { while (Count--) { /*uint32_t Index =*/readVaruint32(Ptr); StringRef Name = readString(Ptr); - if (Name.size()) + if (!Name.empty()) Symbols.emplace_back(Name, WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME); } @@ -313,11 +316,12 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: break; case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: - Reloc.Addend = readVaruint32(Ptr); + Reloc.Addend = readVarint32(Ptr); break; default: return make_error("Bad relocation type", diff --git a/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp b/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp index 3e1bed19d61f..9b1ff7e5dc16 100644 --- a/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -223,7 +223,7 @@ void MappingTraits::mapping( IO.mapRequired("Type", Relocation.Type); IO.mapRequired("Index", Relocation.Index); IO.mapRequired("Offset", Relocation.Offset); - IO.mapRequired("Addend", Relocation.Addend); + IO.mapOptional("Addend", Relocation.Addend, 0); } void MappingTraits::mapping( @@ -294,6 +294,9 @@ void MappingTraits::mapping(IO &IO, case wasm::WASM_OPCODE_F64_CONST: IO.mapRequired("Value", Expr.Value.Float64); break; + case wasm::WASM_OPCODE_GET_GLOBAL: + IO.mapRequired("Index", Expr.Value.Global); + break; } } diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp index 0421946a32a6..55ac2541948e 100644 --- a/contrib/llvm/lib/Passes/PassBuilder.cpp +++ b/contrib/llvm/lib/Passes/PassBuilder.cpp @@ -624,6 +624,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // And finally clean up LCSSA form before generating code. OptimizePM.addPass(InstSimplifierPass()); + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + OptimizePM.addPass(SimplifyCFGPass()); + // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp index c4c892f0352a..e1e2c22e1df1 100644 --- a/contrib/llvm/lib/Support/APFloat.cpp +++ b/contrib/llvm/lib/Support/APFloat.cpp @@ -3393,7 +3393,7 @@ namespace { } void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, bool TruncateZero) const { switch (category) { case fcInfinity: if (isNegative()) @@ -3407,9 +3407,16 @@ void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, if (isNegative()) Str.push_back('-'); - if (!FormatMaxPadding) - append(Str, "0.0E+0"); - else + if (!FormatMaxPadding) { + if (TruncateZero) + append(Str, "0.0E+0"); + else { + append(Str, "0.0"); + if (FormatPrecision > 1) + Str.append(FormatPrecision - 1, '0'); + append(Str, "e+00"); + } + } else Str.push_back('0'); return; @@ -3543,12 +3550,16 @@ void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, Str.push_back(buffer[NDigits-1]); Str.push_back('.'); - if (NDigits == 1) + if (NDigits == 1 && TruncateZero) Str.push_back('0'); else for (unsigned I = 1; I != NDigits; ++I) Str.push_back(buffer[NDigits-1-I]); - Str.push_back('E'); + // Fill with zeros up to FormatPrecision. + if (!TruncateZero && FormatPrecision > NDigits - 1) + Str.append(FormatPrecision - NDigits + 1, '0'); + // For !TruncateZero we use lower 'e'. + Str.push_back(TruncateZero ? 'E' : 'e'); Str.push_back(exp >= 0 ? '+' : '-'); if (exp < 0) exp = -exp; @@ -3557,6 +3568,9 @@ void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, expbuf.push_back((char) ('0' + (exp % 10))); exp /= 10; } while (exp); + // Exponent always at least two digits if we do not truncate zeros. + if (!TruncateZero && expbuf.size() < 2) + expbuf.push_back('0'); for (unsigned I = 0, E = expbuf.size(); I != E; ++I) Str.push_back(expbuf[E-1-I]); return; @@ -4362,10 +4376,11 @@ bool DoubleAPFloat::isInteger() const { void DoubleAPFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, + bool TruncateZero) const { assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) - .toString(Str, FormatPrecision, FormatMaxPadding); + .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); } bool DoubleAPFloat::getExactInverse(APFloat *inv) const { diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp index 2d049a1cff85..1227d7528c8f 100644 --- a/contrib/llvm/lib/Support/APInt.cpp +++ b/contrib/llvm/lib/Support/APInt.cpp @@ -81,7 +81,7 @@ void APInt::initSlowCase(uint64_t val, bool isSigned) { pVal[0] = val; if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) - pVal[i] = -1ULL; + pVal[i] = WORD_MAX; clearUnusedBits(); } @@ -364,44 +364,20 @@ bool APInt::EqualSlowCase(const APInt& RHS) const { return std::equal(pVal, pVal + getNumWords(), RHS.pVal); } -bool APInt::ult(const APInt& RHS) const { +int APInt::compare(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) - return VAL < RHS.VAL; + return VAL < RHS.VAL ? -1 : VAL > RHS.VAL; - // Get active bit length of both operands - unsigned n1 = getActiveBits(); - unsigned n2 = RHS.getActiveBits(); - - // If magnitude of LHS is less than RHS, return true. - if (n1 < n2) - return true; - - // If magnitude of RHS is greater than LHS, return false. - if (n2 < n1) - return false; - - // If they both fit in a word, just compare the low order word - if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) - return pVal[0] < RHS.pVal[0]; - - // Otherwise, compare all words - unsigned topWord = whichWord(std::max(n1,n2)-1); - for (int i = topWord; i >= 0; --i) { - if (pVal[i] > RHS.pVal[i]) - return false; - if (pVal[i] < RHS.pVal[i]) - return true; - } - return false; + return tcCompare(pVal, RHS.pVal, getNumWords()); } -bool APInt::slt(const APInt& RHS) const { +int APInt::compareSigned(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) { int64_t lhsSext = SignExtend64(VAL, BitWidth); int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth); - return lhsSext < rhsSext; + return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; } bool lhsNeg = isNegative(); @@ -409,11 +385,11 @@ bool APInt::slt(const APInt& RHS) const { // If the sign bits don't match, then (LHS < RHS) if LHS is negative if (lhsNeg != rhsNeg) - return lhsNeg; + return lhsNeg ? -1 : 1; // Otherwise we can just use an unsigned comparison, because even negative // numbers compare correctly this way if both have the same signed-ness. - return ult(RHS); + return tcCompare(pVal, RHS.pVal, getNumWords()); } void APInt::setBit(unsigned bitPosition) { @@ -428,13 +404,13 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { unsigned hiWord = whichWord(hiBit); // Create an initial mask for the low word with zeros below loBit. - uint64_t loMask = UINT64_MAX << whichBit(loBit); + uint64_t loMask = WORD_MAX << whichBit(loBit); // If hiBit is not aligned, we need a high mask. unsigned hiShiftAmt = whichBit(hiBit); if (hiShiftAmt != 0) { // Create a high mask with zeros above hiBit. - uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); + uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); // If loWord and hiWord are equal, then we combine the masks. Otherwise, // set the bits in hiWord. if (hiWord == loWord) @@ -447,7 +423,7 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { // Fill any words between loWord and hiWord with all ones. for (unsigned word = loWord + 1; word < hiWord; ++word) - pVal[word] = UINT64_MAX; + pVal[word] = WORD_MAX; } /// Set the given bit to 0 whose position is given as "bitPosition". @@ -487,7 +463,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Single word result can be done as a direct bitmask. if (isSingleWord()) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); VAL &= ~(mask << bitPosition); VAL |= (subBits.VAL << bitPosition); return; @@ -499,7 +475,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Insertion within a single word can be done as a direct bitmask. if (loWord == hi1Word) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); pVal[loWord] &= ~(mask << loBit); pVal[loWord] |= (subBits.VAL << loBit); return; @@ -515,7 +491,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Mask+insert remaining bits. unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; if (remainingBits != 0) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits); pVal[hi1Word] &= ~mask; pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); } @@ -682,7 +658,7 @@ unsigned APInt::countLeadingOnes() const { unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { - if (pVal[i] == -1ULL) + if (pVal[i] == WORD_MAX) Count += APINT_BITS_PER_WORD; else { Count += llvm::countLeadingOnes(pVal[i]); @@ -708,11 +684,12 @@ unsigned APInt::countTrailingZeros() const { unsigned APInt::countTrailingOnesSlowCase() const { unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == -1ULL; ++i) + for (; i < getNumWords() && pVal[i] == WORD_MAX; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) Count += llvm::countTrailingOnes(pVal[i]); - return std::min(Count, BitWidth); + assert(Count <= BitWidth); + return Count; } unsigned APInt::countPopulationSlowCase() const { @@ -962,43 +939,26 @@ APInt APInt::trunc(unsigned width) const { } // Sign extend to a new width. -APInt APInt::sext(unsigned width) const { - assert(width > BitWidth && "Invalid APInt SignExtend request"); +APInt APInt::sext(unsigned Width) const { + assert(Width > BitWidth && "Invalid APInt SignExtend request"); - if (width <= APINT_BITS_PER_WORD) { - uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth); - val = (int64_t)val >> (width - BitWidth); - return APInt(width, val >> (APINT_BITS_PER_WORD - width)); - } + if (Width <= APINT_BITS_PER_WORD) + return APInt(Width, SignExtend64(VAL, BitWidth)); - APInt Result(getMemory(getNumWords(width)), width); + APInt Result(getMemory(getNumWords(Width)), Width); - // Copy full words. - unsigned i; - uint64_t word = 0; - for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) { - word = getRawData()[i]; - Result.pVal[i] = word; - } + // Copy words. + std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); - // Read and sign-extend any partial word. - unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD; - if (bits != 0) - word = (int64_t)getRawData()[i] << bits >> bits; - else - word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); - - // Write remaining full words. - for (; i != width / APINT_BITS_PER_WORD; i++) { - Result.pVal[i] = word; - word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); - } - - // Write any partial word. - bits = (0 - width) % APINT_BITS_PER_WORD; - if (bits != 0) - Result.pVal[i] = word << bits >> bits; + // Sign extend the last word since there may be unused bits in the input. + Result.pVal[getNumWords() - 1] = + SignExtend64(Result.pVal[getNumWords() - 1], + ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + // Fill with sign bits. + std::memset(Result.pVal + getNumWords(), isNegative() ? -1 : 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); + Result.clearUnusedBits(); return Result; } @@ -1012,12 +972,11 @@ APInt APInt::zext(unsigned width) const { APInt Result(getMemory(getNumWords(width)), width); // Copy words. - unsigned i; - for (i = 0; i != getNumWords(); i++) - Result.pVal[i] = getRawData()[i]; + std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); // Zero remaining words. - memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE); + std::memset(Result.pVal + getNumWords(), 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); return Result; } @@ -1052,89 +1011,51 @@ APInt APInt::sextOrSelf(unsigned width) const { /// Arithmetic right-shift this APInt by shiftAmt. /// @brief Arithmetic right-shift function. -APInt APInt::ashr(const APInt &shiftAmt) const { - return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth)); +void APInt::ashrInPlace(const APInt &shiftAmt) { + ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); } /// Arithmetic right-shift this APInt by shiftAmt. /// @brief Arithmetic right-shift function. -APInt APInt::ashr(unsigned shiftAmt) const { - assert(shiftAmt <= BitWidth && "Invalid shift amount"); - // Handle a degenerate case - if (shiftAmt == 0) - return *this; +void APInt::ashrSlowCase(unsigned ShiftAmt) { + // Don't bother performing a no-op shift. + if (!ShiftAmt) + return; - // Handle single word shifts with built-in ashr - if (isSingleWord()) { - if (shiftAmt == BitWidth) - return APInt(BitWidth, 0); // undefined - return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt); - } + // Save the original sign bit for later. + bool Negative = isNegative(); - // If all the bits were shifted out, the result is, technically, undefined. - // We return -1 if it was negative, 0 otherwise. We check this early to avoid - // issues in the algorithm below. - if (shiftAmt == BitWidth) { - if (isNegative()) - return APInt(BitWidth, -1ULL, true); - else - return APInt(BitWidth, 0); - } + // WordShift is the inter-part shift; BitShift is is intra-part shift. + unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; + unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; - // Create some space for the result. - uint64_t * val = new uint64_t[getNumWords()]; + unsigned WordsToMove = getNumWords() - WordShift; + if (WordsToMove != 0) { + // Sign extend the last word to fill in the unused bits. + pVal[getNumWords() - 1] = SignExtend64( + pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); - // Compute some values needed by the following shift algorithms - unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word - unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift - unsigned breakWord = getNumWords() - 1 - offset; // last word affected - unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word? - if (bitsInWord == 0) - bitsInWord = APINT_BITS_PER_WORD; + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(pVal, pVal + WordShift, WordsToMove * APINT_WORD_SIZE); + } else { + // Move the words containing significant bits. + for (unsigned i = 0; i != WordsToMove - 1; ++i) + pVal[i] = (pVal[i + WordShift] >> BitShift) | + (pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); - // If we are shifting whole words, just move whole words - if (wordShift == 0) { - // Move the words containing significant bits - for (unsigned i = 0; i <= breakWord; ++i) - val[i] = pVal[i+offset]; // move whole word - - // Adjust the top significant word for sign bit fill, if negative - if (isNegative()) - if (bitsInWord < APINT_BITS_PER_WORD) - val[breakWord] |= ~0ULL << bitsInWord; // set high bits - } else { - // Shift the low order words - for (unsigned i = 0; i < breakWord; ++i) { - // This combines the shifted corresponding word with the low bits from - // the next word (shifted into this word's high bits). - val[i] = (pVal[i+offset] >> wordShift) | - (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); - } - - // Shift the break word. In this case there are no bits from the next word - // to include in this word. - val[breakWord] = pVal[breakWord+offset] >> wordShift; - - // Deal with sign extension in the break word, and possibly the word before - // it. - if (isNegative()) { - if (wordShift > bitsInWord) { - if (breakWord > 0) - val[breakWord-1] |= - ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); - val[breakWord] |= ~0ULL; - } else - val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); + // Handle the last word which has no high bits to copy. + pVal[WordsToMove - 1] = pVal[WordShift + WordsToMove - 1] >> BitShift; + // Sign extend one more time. + pVal[WordsToMove - 1] = + SignExtend64(pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); } } - // Remaining words are 0 or -1, just assign them. - uint64_t fillValue = (isNegative() ? -1ULL : 0); - for (unsigned i = breakWord+1; i < getNumWords(); ++i) - val[i] = fillValue; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; + // Fill in the remainder based on the original sign. + std::memset(pVal + WordsToMove, Negative ? -1 : 0, + WordShift * APINT_WORD_SIZE); + clearUnusedBits(); } /// Logical right-shift this APInt by shiftAmt. @@ -2608,7 +2529,7 @@ void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { if (!Count) return; - /* WordShift is the inter-part shift; BitShift is is intra-part shift. */ + // WordShift is the inter-part shift; BitShift is the intra-part shift. unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); unsigned BitShift = Count % APINT_BITS_PER_WORD; @@ -2635,7 +2556,7 @@ void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { if (!Count) return; - // WordShift is the inter-part shift; BitShift is is intra-part shift. + // WordShift is the inter-part shift; BitShift is the intra-part shift. unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); unsigned BitShift = Count % APINT_BITS_PER_WORD; @@ -2684,10 +2605,8 @@ int APInt::tcCompare(const WordType *lhs, const WordType *rhs, unsigned parts) { while (parts) { parts--; - if (lhs[parts] == rhs[parts]) - continue; - - return (lhs[parts] > rhs[parts]) ? 1 : -1; + if (lhs[parts] != rhs[parts]) + return (lhs[parts] > rhs[parts]) ? 1 : -1; } return 0; diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp index 92ce6185306a..22fb3f2cb9c9 100644 --- a/contrib/llvm/lib/Support/DynamicLibrary.cpp +++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp @@ -193,4 +193,3 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName) { void LLVMAddSymbol(const char *symbolName, void *symbolValue) { return llvm::sys::DynamicLibrary::AddSymbol(symbolName, symbolValue); } - diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp index 64d5977e2ebd..f3a654d7d2bd 100644 --- a/contrib/llvm/lib/Support/Triple.cpp +++ b/contrib/llvm/lib/Support/Triple.cpp @@ -161,6 +161,7 @@ StringRef Triple::getVendorTypeName(VendorType Kind) { case Myriad: return "myriad"; case AMD: return "amd"; case Mesa: return "mesa"; + case SUSE: return "suse"; } llvm_unreachable("Invalid VendorType!"); @@ -443,6 +444,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("myriad", Triple::Myriad) .Case("amd", Triple::AMD) .Case("mesa", Triple::Mesa) + .Case("suse", Triple::SUSE) .Default(Triple::UnknownVendor); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index d0c0956b87ca..629ad5c61b78 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -942,6 +942,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, AArch64::XZR, NextMBBI); case AArch64::CMP_SWAP_128: return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); + } return false; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 550174b22a89..dc916c034661 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1125,7 +1125,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) BasePointerReg = RegInfo->getBaseRegister(); - bool ExtraCSSpill = false; + unsigned ExtraCSSpill = 0; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { @@ -1153,7 +1153,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(PairedReg); if (AArch64::GPR64RegClass.contains(PairedReg) && !RegInfo->isReservedReg(MF, PairedReg)) - ExtraCSSpill = true; + ExtraCSSpill = PairedReg; } } @@ -1186,8 +1186,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. - if (BigStack && !ExtraCSSpill) { - if (UnspilledCSGPR != AArch64::NoRegister) { + if (BigStack) { + if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); @@ -1196,15 +1196,18 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // store the pair. if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); - ExtraCSSpill = true; + ExtraCSSpill = UnspilledCSGPRPaired; NumRegsSpilled = SavedRegs.count(); } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. - if (!ExtraCSSpill) { - const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - int FI = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false); + if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass &RC = AArch64::GPR64RegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + int FI = MFI.CreateStackObject(Size, Align, false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ddc95199d4c..a7c98fbb425f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -91,6 +91,7 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); +STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); static cl::opt EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, @@ -105,6 +106,12 @@ cl::opt EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +static cl::opt +EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, + cl::desc("Enable AArch64 logical imm instruction " + "optimization"), + cl::init(true)); + /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -787,6 +794,140 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, + const APInt &Demanded, + TargetLowering::TargetLoweringOpt &TLO, + unsigned NewOpc) { + uint64_t OldImm = Imm, NewImm, Enc; + uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; + + // Return if the immediate is already all zeros, all ones, a bimm32 or a + // bimm64. + if (Imm == 0 || Imm == Mask || + AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) + return false; + + unsigned EltSize = Size; + uint64_t DemandedBits = Demanded.getZExtValue(); + + // Clear bits that are not demanded. + Imm &= DemandedBits; + + while (true) { + // The goal here is to set the non-demanded bits in a way that minimizes + // the number of switching between 0 and 1. In order to achieve this goal, + // we set the non-demanded bits to the value of the preceding demanded bits. + // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a + // non-demanded bit), we copy bit0 (1) to the least significant 'x', + // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'. + // The final result is 0b11000011. + uint64_t NonDemandedBits = ~DemandedBits; + uint64_t InvertedImm = ~Imm & DemandedBits; + uint64_t RotatedImm = + ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & + NonDemandedBits; + uint64_t Sum = RotatedImm + NonDemandedBits; + bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); + uint64_t Ones = (Sum + Carry) & NonDemandedBits; + NewImm = (Imm | Ones) & Mask; + + // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate + // or all-ones or all-zeros, in which case we can stop searching. Otherwise, + // we halve the element size and continue the search. + if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) + break; + + // We cannot shrink the element size any further if it is 2-bits. + if (EltSize == 2) + return false; + + EltSize /= 2; + Mask >>= EltSize; + uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; + + // Return if there is mismatch in any of the demanded bits of Imm and Hi. + if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) + return false; + + // Merge the upper and lower halves of Imm and DemandedBits. + Imm |= Hi; + DemandedBits |= DemandedBitsHi; + } + + ++NumOptimizedImms; + + // Replicate the element across the register width. + while (EltSize < Size) { + NewImm |= NewImm << EltSize; + EltSize *= 2; + } + + (void)OldImm; + assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && + "demanded bits should never be altered"); + assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm"); + + // Create the new constant immediate node. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + // If the new constant immediate is all-zeros or all-ones, let the target + // independent DAG combine optimize this node. + if (NewImm == 0 || NewImm == OrigMask) + return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT)); + + // Otherwise, create a machine node so that target independent DAG combine + // doesn't undo this optimization. + Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); + SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); + SDValue New( + TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + + return TLO.CombineTo(Op, New); +} + +bool AArch64TargetLowering::targetShrinkDemandedConstant( + SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { + // Delay this optimization to as late as possible. + if (!TLO.LegalOps) + return false; + + if (!EnableOptimizeLogicalImm) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector()) + return false; + + unsigned Size = VT.getSizeInBits(); + assert((Size == 32 || Size == 64) && + "i32 or i64 is expected after legalization."); + + // Exit early if we demand all bits. + if (Demanded.countPopulation() == Size) + return false; + + unsigned NewOpc; + switch (Op.getOpcode()) { + default: + return false; + case ISD::AND: + NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; + break; + case ISD::OR: + NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; + break; + case ISD::XOR: + NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; + break; + } + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + uint64_t Imm = C->getZExtValue(); + return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc); +} + /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. @@ -3418,11 +3559,75 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Other Lowering Code //===----------------------------------------------------------------------===// +SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); +} + +// (loadGOT sym) +template +SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT); + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes instead of using a wrapper node. + return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); +} + +// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym)) +template +SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG) + const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, Ty, + getTargetNode(N, Ty, DAG, AArch64II::MO_G3), + getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC)); +} + +// (addlow (adrp %hi(sym)) %lo(sym)) +template +SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE); + SDValue Lo = getTargetNode(N, Ty, DAG, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo); +} + SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); - const GlobalAddressSDNode *GN = cast(Op); + GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); @@ -3430,32 +3635,15 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, assert(cast(Op)->getOffset() == 0 && "unexpected offset in global node"); - // This also catched the large code model case for Darwin. + // This also catches the large code model case for Darwin. if ((OpFlags & AArch64II::MO_GOT) != 0) { - SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes instead of using a wrapper node. - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(GN, DAG); } if (getTargetMachine().getCodeModel() == CodeModel::Large) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(GN, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and - // the only correct model on Darwin. - SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - OpFlags | AArch64II::MO_PAGE); - unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC; - SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(GN, DAG); } } @@ -4232,90 +4420,37 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(JT, DAG); } - - SDValue Hi = - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(JT, DAG); } SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large) { // Use the GOT for the large code model on iOS. if (Subtarget->isTargetMachO()) { - SDValue GotAddr = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_GOT); - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(CP, DAG); } - - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G3), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G2 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G1 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(CP, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on - // ELF, the only valid one on Darwin. - SDValue Hi = - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(CP, DAG); } } SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - const BlockAddress *BA = cast(Op)->getBlockAddress(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); + BlockAddressSDNode *BA = cast(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(BA, DAG); } else { - SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(BA, DAG); } } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index a023b4373835..6081b07479b9 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -255,6 +255,9 @@ class AArch64TargetLowering : public TargetLowering { const SelectionDAG &DAG, unsigned Depth = 0) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; /// Returns true if the target allows unaligned memory accesses of the @@ -508,6 +511,18 @@ class AArch64TargetLowering : public TargetLowering { const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + template SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const; + template + SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const; + template SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index 867074c3c374..71826bec6b11 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -14,6 +14,9 @@ //===---------------------------------- // Atomic fences //===---------------------------------- +let AddedComplexity = 15, Size = 0 in +def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), + [(atomic_fence imm:$ordering, 0)]>, Sched<[]>; def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 16be4432b160..c44daf306ea9 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -693,11 +693,11 @@ def addsub_shifted_imm32_neg : addsub_shifted_imm_neg; def addsub_shifted_imm64_neg : addsub_shifted_imm_neg; def gi_addsub_shifted_imm32 : - GIComplexOperandMatcher, + GIComplexOperandMatcher, GIComplexPatternEquiv; def gi_addsub_shifted_imm64 : - GIComplexOperandMatcher, + GIComplexOperandMatcher, GIComplexPatternEquiv; class neg_addsub_shifted_imm diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 41fc8eceab5c..cb268828455e 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2320,7 +2320,7 @@ void AArch64InstrInfo::storeRegToStackSlot( PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::STRBui; @@ -2424,7 +2424,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRBui; @@ -2649,7 +2649,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { - assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == + TRI.getRegSizeInBits(*getRegClass(SrcReg)) && "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, @@ -2735,7 +2736,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( } if (FillRC) { - assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == + TRI.getRegSizeInBits(*FillRC) && "Mismatched regclass size on folded subreg COPY"); loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); MachineInstr &LoadMI = *--InsertPt; @@ -3025,7 +3027,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return false; } -void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +void AArch64InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(AArch64::HINT); NopInst.addOperand(MCOperand::createImm(0)); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h index bacce441f6c5..4cd14db633b9 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -205,7 +205,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 5e01b6cd2b46..b0e0e3eb4ba7 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -41,12 +41,17 @@ using namespace llvm; namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class AArch64InstructionSelector : public InstructionSelector { public: AArch64InstructionSelector(const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); + void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -62,14 +67,19 @@ class AArch64InstructionSelector : public InstructionSelector { bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - bool selectArithImmed(MachineOperand &Root, MachineOperand &Result1, - MachineOperand &Result2) const; + ComplexRendererFn selectArithImmed(MachineOperand &Root) const; const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; + bool ForCodeSize; + + PredicateBitset AvailableFeatures; + PredicateBitset + computeAvailableFeatures(const MachineFunction *MF, + const AArch64Subtarget *Subtarget) const; // We declare the temporaries used by selectImpl() in the class to minimize the // cost of constructing placeholder values. @@ -88,7 +98,7 @@ AArch64InstructionSelector::AArch64InstructionSelector( const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + TRI(*STI.getRegisterInfo()), RBI(RBI), ForCodeSize(), AvailableFeatures() #define GET_GLOBALISEL_TEMPORARIES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -567,6 +577,12 @@ bool AArch64InstructionSelector::selectVaStartDarwin( return true; } +void AArch64InstructionSelector::beginFunction( + const MachineFunction &MF) { + ForCodeSize = MF.getFunction()->optForSize(); + AvailableFeatures = computeAvailableFeatures(&MF, &STI); +} + bool AArch64InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1312,9 +1328,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. -bool AArch64InstructionSelector::selectArithImmed( - MachineOperand &Root, MachineOperand &Result1, - MachineOperand &Result2) const { +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { MachineInstr &MI = *Root.getParent(); MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -1333,13 +1348,13 @@ bool AArch64InstructionSelector::selectArithImmed( else if (Root.isReg()) { MachineInstr *Def = MRI.getVRegDef(Root.getReg()); if (Def->getOpcode() != TargetOpcode::G_CONSTANT) - return false; + return nullptr; MachineOperand &Op1 = Def->getOperand(1); if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64) - return false; + return nullptr; Immed = Op1.getCImm()->getZExtValue(); } else - return false; + return nullptr; unsigned ShiftAmt; @@ -1349,14 +1364,10 @@ bool AArch64InstructionSelector::selectArithImmed( ShiftAmt = 12; Immed = Immed >> 12; } else - return false; + return nullptr; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); - Result1.ChangeToImmediate(Immed); - Result1.clearParent(); - Result2.ChangeToImmediate(ShVal); - Result2.clearParent(); - return true; + return [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed).addImm(ShVal); }; } namespace llvm { diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index eec089087fe0..cf1c0b66db58 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -79,14 +79,14 @@ def : WriteRes { let Latency = 5; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 3; let NumMicroOps = 3; } +def : WriteRes + { let Latency = 0; let NumMicroOps = 2; } def : WriteRes { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 5; } -def : WriteRes - { let Latency = 4; let NumMicroOps = 3; } +def : WriteRes + { let Latency = 0; let NumMicroOps = 2; } def : WriteRes { let Latency = 3; let NumMicroOps = 2; } def : WriteRes { let Latency = 2; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td index 4bd77d344488..8f8eeef8a6cf 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -326,6 +326,10 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; // SIMD Store Instructions // ----------------------------------------------------------------------------- +def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>; + def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; @@ -421,6 +425,7 @@ def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>; def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>; def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>; + // FP Miscellaneous Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>; @@ -433,7 +438,6 @@ def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>; - // Load Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; @@ -461,6 +465,7 @@ def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>; def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>; + // Miscellaneous Data-Processing Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; @@ -502,28 +507,30 @@ def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>; -def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; +def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; -def : InstRW<[WriteST], (instregex "^LDC.*$")>; -def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>; def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>; // Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>; +def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>; +def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>; def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>; def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>; -def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>; +def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>; +def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>; def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>; def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>; def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>; +def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>; +def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td index 9cdb4be4246b..e64b2c441a19 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td @@ -28,7 +28,6 @@ //===----------------------------------------------------------------------===// // Define 1 micro-op types - def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } @@ -175,18 +174,33 @@ def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { //===----------------------------------------------------------------------===// // Define 3 micro-op types +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 3; let NumMicroOps = 3; } + def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 4; let NumMicroOps = 3; } + def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 5; let NumMicroOps = 3; } + def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 6; let NumMicroOps = 3; @@ -196,10 +210,12 @@ def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { let Latency = 4; let NumMicroOps = 3; } + def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { let Latency = 3; let NumMicroOps = 3; } + def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, FalkorUnitLD]> { let Latency = 3; @@ -259,6 +275,12 @@ def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { let NumMicroOps = 4; } +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + //===----------------------------------------------------------------------===// // Define 5 micro-op types @@ -289,6 +311,13 @@ def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, let NumMicroOps = 6; } +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + //===----------------------------------------------------------------------===// // Define 8 micro-op types diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index d7bbc2bcd22c..4dbcc9581a84 100644 --- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2473,16 +2473,14 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { return MatchOperand_ParseFail; } - auto DB = AArch64DB::lookupDBByName(Tok.getString()); - if (!DB) { - TokError("invalid barrier option name"); - return MatchOperand_ParseFail; - } - // The only valid named option for ISB is 'sy' - if (Mnemonic == "isb" && DB->Encoding != AArch64DB::sy) { + auto DB = AArch64DB::lookupDBByName(Tok.getString()); + if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) { TokError("'sy' or #imm operand expected"); return MatchOperand_ParseFail; + } else if (!DB) { + TokError("invalid barrier option name"); + return MatchOperand_ParseFail; } Operands.push_back(AArch64Operand::CreateBarrier( diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 41ae70f85e58..fc89657bffd3 100644 --- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" @@ -275,6 +276,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } + if (Opcode == AArch64::CompilerBarrier) { + O << '\t' << MAI.getCommentString() << " COMPILER BARRIER"; + printAnnotation(O, Annot); + return; + } + if (!printAliasInstr(MI, STI, O)) printInstruction(MI, STI, O); diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 62dfa59483eb..33698d2b8c38 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -565,6 +565,9 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup)); return; + } else if (MI.getOpcode() == AArch64::CompilerBarrier) { + // This just prevents the compiler from reordering accesses, no actual code. + return; } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td index 2c7a2d8962d0..0f331486d0f8 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -406,7 +406,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, - FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode + FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, + FeatureFastFMAF32 ] >; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 318de7f2e3d2..f5110857da84 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -116,8 +116,11 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; - bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, - SDValue &SOffset, SDValue &ImmOffset) const; + bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr, + SDValue &SOffset, SDValue &ImmOffset) const; + bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + SDValue &Offset) const; + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; @@ -150,14 +153,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Omod) const; bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; @@ -953,8 +954,12 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, return true; } +static bool isLegalMUBUFImmOffset(unsigned Imm) { + return isUInt<12>(Imm); +} + static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { - return isUInt<12>(Imm->getZExtValue()); + return isLegalMUBUFImmOffset(Imm->getZExtValue()); } bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, @@ -1076,9 +1081,9 @@ SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { return N; } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, - SDValue &VAddr, SDValue &SOffset, - SDValue &ImmOffset) const { +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc, + SDValue &VAddr, SDValue &SOffset, + SDValue &ImmOffset) const { SDLoc DL(Addr); MachineFunction &MF = CurDAG->getMachineFunction(); @@ -1087,8 +1092,22 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); - // (add n0, c1) + if (ConstantSDNode *CAddr = dyn_cast(Addr)) { + unsigned Imm = CAddr->getZExtValue(); + assert(!isLegalMUBUFImmOffset(Imm) && + "should have been selected by other pattern"); + + SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); + MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + DL, MVT::i32, HighBits); + VAddr = SDValue(MovHighBits, 0); + ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); + return true; + } + if (CurDAG->isBaseWithConstantOffset(Addr)) { + // (add n0, c1) + SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); @@ -1107,6 +1126,24 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, return true; } +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr, + SDValue &SRsrc, + SDValue &SOffset, + SDValue &Offset) const { + ConstantSDNode *CAddr = dyn_cast(Addr); + if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) + return false; + + SDLoc DL(Addr); + MachineFunction &MF = CurDAG->getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + + SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); + SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); + Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); + return true; +} + bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, @@ -1628,38 +1665,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, return isNoNanSrc(Src); } -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - bool Res = SelectVOP3Mods(In, Src, SrcMods); - return Res && cast(SrcMods)->isNullValue(); +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { + if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) + return false; + + Src = In; + return true; } bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); - - return SelectVOP3Mods(In, Src, SrcMods); -} - -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, - SDValue &SrcMods, SDValue &Clamp, - SDValue &Omod) const { - bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); - - return Res && cast(SrcMods)->isNullValue() && - cast(Clamp)->isNullValue() && - cast(Omod)->isNullValue(); -} - -bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, - SDValue &SrcMods, - SDValue &Omod) const { - // FIXME: Handle Omod - Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return SelectVOP3Mods(In, Src, SrcMods); } @@ -1677,9 +1696,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, Src = In; SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return true; } diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index c0f336e082bd..e21775e61dd4 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2315,12 +2315,13 @@ static bool simplifyI24(SDNode *Node24, unsigned OpIdx, SelectionDAG &DAG = DCI.DAG; SDValue Op = Node24->getOperand(OpIdx); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = Op.getValueType(); APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, true, true); - if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI)) + if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO)) return true; return false; @@ -3361,7 +3362,7 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { DCI.CommitTargetLoweringOpt(TLO); @@ -3436,6 +3437,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ELSE) NODE_NAME_CASE(LOOP) NODE_NAME_CASE(CALL) + NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index d6aa0ba92bf7..13cbfe267932 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -231,6 +231,10 @@ class AMDGPUTargetLowering : public TargetLowering { AMDGPUAS getAMDGPUAS() const { return AMDGPUASI; } + + MVT getFenceOperandTy(const DataLayout &DL) const override { + return MVT::i32; + } }; namespace AMDGPUISD { @@ -244,6 +248,7 @@ enum NodeType : unsigned { // Function call. CALL, + TRAP, // Masked control flow nodes. IF, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 56f060984f08..c1706d12a2ea 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -78,6 +78,11 @@ def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; +def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", + SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, + [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] +>; + def AMDGPUconstdata_ptr : SDNode< "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<0, iPTR>]> diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index b8d681298dee..4e688ab0b105 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -50,6 +50,16 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; +def u16ImmTarget : AsmOperandClass { + let Name = "U16Imm"; + let RenderMethod = "addImmOperands"; +} + +def s16ImmTarget : AsmOperandClass { + let Name = "S16Imm"; + let RenderMethod = "addImmOperands"; +} + let OperandType = "OPERAND_IMMEDIATE" in { def u32imm : Operand { @@ -58,6 +68,12 @@ def u32imm : Operand { def u16imm : Operand { let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = u16ImmTarget; +} + +def s16imm : Operand { + let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = s16ImmTarget; } def u8imm : Operand { diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 14ee1c81f8fa..da247fea7de6 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -225,6 +225,12 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { + if (isVerbose()) + OutStreamer->emitRawComment(" divergent unreachable"); + return; + } + MCInst TmpInst; MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 961f7186f373..70c848f3c7bd 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -479,6 +479,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isSMRDLiteralOffset() const; bool isDPPCtrl() const; bool isGPRIdxMode() const; + bool isS16Imm() const; + bool isU16Imm() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -2836,6 +2838,28 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { // s_waitcnt //===----------------------------------------------------------------------===// +static bool +encodeCnt( + const AMDGPU::IsaInfo::IsaVersion ISA, + int64_t &IntVal, + int64_t CntVal, + bool Saturate, + unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) +{ + bool Failed = false; + + IntVal = encode(ISA, IntVal, CntVal); + if (CntVal != decode(ISA, IntVal)) { + if (Saturate) { + IntVal = encode(ISA, IntVal, -1); + } else { + Failed = true; + } + } + return Failed; +} + bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { StringRef CntName = Parser.getTok().getString(); int64_t CntVal; @@ -2851,25 +2875,35 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getParser().parseAbsoluteExpression(CntVal)) return true; - if (getLexer().isNot(AsmToken::RParen)) - return true; - - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) - Parser.Lex(); - AMDGPU::IsaInfo::IsaVersion ISA = AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); - if (CntName == "vmcnt") - IntVal = encodeVmcnt(ISA, IntVal, CntVal); - else if (CntName == "expcnt") - IntVal = encodeExpcnt(ISA, IntVal, CntVal); - else if (CntName == "lgkmcnt") - IntVal = encodeLgkmcnt(ISA, IntVal, CntVal); - else - return true; - return false; + bool Failed = true; + bool Sat = CntName.endswith("_sat"); + + if (CntName == "vmcnt" || CntName == "vmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); + } else if (CntName == "expcnt" || CntName == "expcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); + } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); + } + + // To improve diagnostics, do not skip delimiters on errors + if (!Failed) { + if (getLexer().isNot(AsmToken::RParen)) { + return true; + } + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); + } + } + } + + return Failed; } OperandMatchResultTy @@ -3858,6 +3892,14 @@ bool AMDGPUOperand::isGPRIdxMode() const { return isImm() && isUInt<4>(getImm()); } +bool AMDGPUOperand::isS16Imm() const { + return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); +} + +bool AMDGPUOperand::isU16Imm() const { + return isImm() && isUInt<16>(getImm()); +} + OperandMatchResultTy AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); diff --git a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td index a6609f0725ab..89eddb9ce961 100644 --- a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -11,7 +11,9 @@ def MUBUFAddr32 : ComplexPattern; def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; -def MUBUFScratch : ComplexPattern; +def MUBUFScratchOffen : ComplexPattern; +def MUBUFScratchOffset : ComplexPattern; + def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; @@ -958,21 +960,30 @@ defm : MUBUFLoad_Pattern ; } // End Predicates = [Has16BitInsts] -class MUBUFScratchLoadPat : Pat < - (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr, - i32:$soffset, u16imm:$offset))), - (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; +multiclass MUBUFScratchLoadPat { + def : Pat < + (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset))), + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + >; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; + def : Pat < + (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) + >; +} + +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; // BUFFER_LOAD_DWORD*, addr64=0 multiclass MUBUF_Load_Dword ; defm : MUBUFStore_Pattern ; -class MUBUFScratchStorePat : Pat < - (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, - u16imm:$offset)), - (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; +multiclass MUBUFScratchStorePat { + def : Pat < + (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset)), + (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + >; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; + def : Pat < + (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, + u16imm:$offset)), + (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0) + >; +} + +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; //===----------------------------------------------------------------------===// // MTBUF Patterns diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 4ecfa118fb27..bf16a8216001 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -83,8 +83,8 @@ unsigned GCNRegPressure::getRegKind(unsigned Reg, const auto RC = MRI.getRegClass(Reg); auto STI = static_cast(MRI.getTargetRegisterInfo()); return STI->isSGPRClass(RC) ? - (RC->getSize() == 4 ? SGPR32 : SGPR_TUPLE) : - (RC->getSize() == 4 ? VGPR32 : VGPR_TUPLE); + (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) : + (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE); } void GCNRegPressure::inc(unsigned Reg, diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 29a6ab9fbe93..647017d5061d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -286,20 +286,20 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, return ValueKind::Pipe; return StringSwitch(BaseTypeName) + .Case("image1d_t", ValueKind::Image) + .Case("image1d_array_t", ValueKind::Image) + .Case("image1d_buffer_t", ValueKind::Image) + .Case("image2d_t", ValueKind::Image) + .Case("image2d_array_t", ValueKind::Image) + .Case("image2d_array_depth_t", ValueKind::Image) + .Case("image2d_array_msaa_t", ValueKind::Image) + .Case("image2d_array_msaa_depth_t", ValueKind::Image) + .Case("image2d_depth_t", ValueKind::Image) + .Case("image2d_msaa_t", ValueKind::Image) + .Case("image2d_msaa_depth_t", ValueKind::Image) + .Case("image3d_t", ValueKind::Image) .Case("sampler_t", ValueKind::Sampler) .Case("queue_t", ValueKind::Queue) - .Cases("image1d_t", - "image1d_array_t", - "image1d_buffer_t", - "image2d_t" , - "image2d_array_t", - "image2d_array_depth_t", - "image2d_array_msaa_t" - "image2d_array_msaa_depth_t" - "image2d_depth_t", - "image2d_msaa_t", - "image2d_msaa_depth_t", - "image3d_t", ValueKind::Image) .Default(isa(Ty) ? (Ty->getPointerAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ? diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 6c61fb1f2d6b..2364e7b7b5fb 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -15,6 +15,7 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4; + StackGrowsUp = true; HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MinInstAlignment = 4; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index f9d258f44a62..b0f0bf04a891 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -81,6 +81,11 @@ using namespace llvm; #define DEBUG_TYPE "si-fix-sgpr-copies" +static cl::opt EnableM0Merge( + "amdgpu-enable-merge-m0", + cl::desc("Merge and hoist M0 initializations"), + cl::init(false)); + namespace { class SIFixSGPRCopies : public MachineFunctionPass { @@ -108,7 +113,7 @@ class SIFixSGPRCopies : public MachineFunctionPass { INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) @@ -332,27 +337,186 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, return true; } -static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI) { - DenseSet Visited; +template +bool searchPredecessors(const MachineBasicBlock *MBB, + const MachineBasicBlock *CutOff, + UnaryPredicate Predicate) { + + if (MBB == CutOff) + return false; + + DenseSet Visited; SmallVector Worklist(MBB->pred_begin(), MBB->pred_end()); while (!Worklist.empty()) { - MachineBasicBlock *mbb = Worklist.back(); - Worklist.pop_back(); + MachineBasicBlock *MBB = Worklist.pop_back_val(); - if (!Visited.insert(mbb).second) + if (!Visited.insert(MBB).second) continue; - if (hasTerminatorThatModifiesExec(*mbb, *TRI)) + if (MBB == CutOff) + continue; + if (Predicate(MBB)) return true; - Worklist.insert(Worklist.end(), mbb->pred_begin(), mbb->pred_end()); + Worklist.append(MBB->pred_begin(), MBB->pred_end()); } return false; } +static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, + const TargetRegisterInfo *TRI) { + return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { + return hasTerminatorThatModifiesExec(*MBB, *TRI); }); +} + +// Checks if there is potential path From instruction To instruction. +// If CutOff is specified and it sits in between of that path we ignore +// a higher portion of the path and report it is not reachable. +static bool isReachable(const MachineInstr *From, + const MachineInstr *To, + const MachineBasicBlock *CutOff, + MachineDominatorTree &MDT) { + // If either From block dominates To block or instructions are in the same + // block and From is higher. + if (MDT.dominates(From, To)) + return true; + + const MachineBasicBlock *MBBFrom = From->getParent(); + const MachineBasicBlock *MBBTo = To->getParent(); + if (MBBFrom == MBBTo) + return false; + + // Instructions are in different blocks, do predecessor search. + // We should almost never get here since we do not usually produce M0 stores + // other than -1. + return searchPredecessors(MBBTo, CutOff, [MBBFrom] + (const MachineBasicBlock *MBB) { return MBB == MBBFrom; }); +} + +// Hoist and merge identical SGPR initializations into a common predecessor. +// This is intended to combine M0 initializations, but can work with any +// SGPR. A VGPR cannot be processed since we cannot guarantee vector +// executioon. +static bool hoistAndMergeSGPRInits(unsigned Reg, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT) { + // List of inits by immediate value. + typedef std::map> InitListMap; + InitListMap Inits; + // List of clobbering instructions. + SmallVector Clobbers; + bool Changed = false; + + for (auto &MI : MRI.def_instructions(Reg)) { + MachineOperand *Imm = nullptr; + for (auto &MO: MI.operands()) { + if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) || + (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) { + Imm = nullptr; + break; + } else if (MO.isImm()) + Imm = &MO; + } + if (Imm) + Inits[Imm->getImm()].push_front(&MI); + else + Clobbers.push_back(&MI); + } + + for (auto &Init : Inits) { + auto &Defs = Init.second; + + for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) { + MachineInstr *MI1 = *I1; + + for (auto I2 = std::next(I1); I2 != E; ) { + MachineInstr *MI2 = *I2; + + // Check any possible interference + auto intereferes = [&](MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To) -> bool { + + assert(MDT.dominates(&*To, &*From)); + + auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool { + const MachineBasicBlock *MBBFrom = From->getParent(); + const MachineBasicBlock *MBBTo = To->getParent(); + bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT); + bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT); + if (!MayClobberFrom && !MayClobberTo) + return false; + if ((MayClobberFrom && !MayClobberTo) || + (!MayClobberFrom && MayClobberTo)) + return true; + // Both can clobber, this is not an interference only if both are + // dominated by Clobber and belong to the same block or if Clobber + // properly dominates To, given that To >> From, so it dominates + // both and located in a common dominator. + return !((MBBFrom == MBBTo && + MDT.dominates(Clobber, &*From) && + MDT.dominates(Clobber, &*To)) || + MDT.properlyDominates(Clobber->getParent(), MBBTo)); + }; + + return (any_of(Clobbers, interferes)) || + (any_of(Inits, [&](InitListMap::value_type &C) { + return C.first != Init.first && any_of(C.second, interferes); + })); + }; + + if (MDT.dominates(MI1, MI2)) { + if (!intereferes(MI2, MI1)) { + DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber() + << " " << *MI2); + MI2->eraseFromParent(); + Defs.erase(I2++); + Changed = true; + continue; + } + } else if (MDT.dominates(MI2, MI1)) { + if (!intereferes(MI1, MI2)) { + DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() + << " " << *MI1); + MI1->eraseFromParent(); + Defs.erase(I1++); + Changed = true; + break; + } + } else { + auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(), + MI2->getParent()); + if (!MBB) { + ++I2; + continue; + } + + MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); + if (!intereferes(MI1, I) && !intereferes(MI2, I)) { + DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() + << " " << *MI1 << "and moving from BB#" + << MI2->getParent()->getNumber() << " to BB#" + << I->getParent()->getNumber() << " " << *MI2); + I->getParent()->splice(I, MI2->getParent(), MI2); + MI1->eraseFromParent(); + Defs.erase(I1++); + Changed = true; + break; + } + } + ++I2; + } + ++I1; + } + } + + if (Changed) + MRI.clearKillFlags(Reg); + + return Changed; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -485,5 +649,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } } + if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) + hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT); + return true; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index abe6af9a6d3f..86e3b37b09e9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -101,10 +101,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); // We need to insert initialization of the scratch resource descriptor. unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); - if (ScratchRsrcReg == AMDGPU::NoRegister) + if (ScratchRsrcReg == AMDGPU::NoRegister || + !MRI.isPhysRegUsed(ScratchRsrcReg)) return AMDGPU::NoRegister; if (ST.hasSGPRInitBug() || @@ -122,8 +124,6 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( // We find the resource first because it has an alignment requirement. - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; ArrayRef AllSGPR128s = getAllSGPR128(ST, MF); AllSGPR128s = AllSGPR128s.slice(std::min(static_cast(AllSGPR128s.size()), NumPreloaded)); @@ -143,24 +143,34 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( return ScratchRsrcReg; } -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +// Shift down registers reserved for the scratch wave offset and stack pointer +// SGPRs. +std::pair +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( const SISubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { - unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); - if (ST.hasSGPRInitBug() || - ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) - return ScratchWaveOffsetReg; - - unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); + + // No replacement necessary. + if (ScratchWaveOffsetReg == AMDGPU::NoRegister || + !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { + assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister); + return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); + } + + unsigned SPReg = MFI->getStackPtrOffsetReg(); + if (ST.hasSGPRInitBug()) + return std::make_pair(ScratchWaveOffsetReg, SPReg); + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, SPReg); AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -175,26 +185,42 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // register from the list to consider, it means that when this // register is being used for the scratch wave offset and there // are no other free SGPRs, then the value will stay in this register. + // + 1 if stack pointer is used. // ---- - // 13 - if (AllSGPRs.size() < 13) - return ScratchWaveOffsetReg; + // 13 (+1) + unsigned ReservedRegCount = 13; + if (SPReg != AMDGPU::NoRegister) + ++ReservedRegCount; - for (MCPhysReg Reg : AllSGPRs.drop_back(13)) { + if (AllSGPRs.size() < ReservedRegCount) + return std::make_pair(ScratchWaveOffsetReg, SPReg); + + bool HandledScratchWaveOffsetReg = + ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + + for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { // Pick the first unallocated SGPR. Be careful not to pick an alias of the // scratch descriptor, since we haven’t added its uses yet. - if (!MRI.isPhysRegUsed(Reg)) { - if (!MRI.isAllocatable(Reg) || - TRI->isSubRegisterEq(ScratchRsrcReg, Reg)) - continue; + if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { + if (!HandledScratchWaveOffsetReg) { + HandledScratchWaveOffsetReg = true; - MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); - MFI->setScratchWaveOffsetReg(Reg); - return Reg; + MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); + MFI->setScratchWaveOffsetReg(Reg); + ScratchWaveOffsetReg = Reg; + } else { + if (SPReg == AMDGPU::NoRegister) + break; + + MRI.replaceRegWith(SPReg, Reg); + MFI->setStackPtrOffsetReg(Reg); + SPReg = Reg; + break; + } } } - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, SPReg); } void SIFrameLowering::emitPrologue(MachineFunction &MF, @@ -220,18 +246,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned ScratchRsrcReg - = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); - unsigned ScratchWaveOffsetReg - = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); - - if (ScratchRsrcReg == AMDGPU::NoRegister) { - assert(ScratchWaveOffsetReg == AMDGPU::NoRegister); - return; - } - - assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg)); - // We need to do the replacement of the private segment buffer and wave offset // register even if there are no stack objects. There could be stores to undef // or a constant without an associated object. @@ -244,19 +258,49 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit()) emitFlatScratchInit(ST, MF, MBB); + unsigned SPReg = MFI->getStackPtrOffsetReg(); + if (SPReg != AMDGPU::NoRegister) { + DebugLoc DL; + int64_t StackSize = MF.getFrameInfo().getStackSize(); + + if (StackSize == 0) { + BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) + .addReg(MFI->getScratchWaveOffsetReg()); + } else { + BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg) + .addReg(MFI->getScratchWaveOffsetReg()) + .addImm(StackSize * ST.getWavefrontSize()); + } + } + + unsigned ScratchRsrcReg + = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); + + unsigned ScratchWaveOffsetReg; + std::tie(ScratchWaveOffsetReg, SPReg) + = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); + + // It's possible to have uses of only ScratchWaveOffsetReg without + // ScratchRsrcReg if it's only used for the initialization of flat_scratch, + // but the inverse is not true. + if (ScratchWaveOffsetReg == AMDGPU::NoRegister) { + assert(ScratchRsrcReg == AMDGPU::NoRegister); + return; + } + // We need to insert initialization of the scratch resource descriptor. unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); - unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } - bool OffsetRegUsed = !MRI.use_empty(ScratchWaveOffsetReg); - bool ResourceRegUsed = !MRI.use_empty(ScratchRsrcReg); + bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg); + bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister && + MRI.isPhysRegUsed(ScratchRsrcReg); // We added live-ins during argument lowering, but since they were not used // they were deleted. We're adding the uses now, so add them back. @@ -469,7 +513,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // this also ensures we shouldn't need a register for the offset when // emergency scavenging. int ScavengeFI = MFI.CreateFixedObject( - AMDGPU::SGPR_32RegClass.getSize(), 0, false); + TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); RS->addScavengingFrameIndex(ScavengeFI); } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 1bfc08093da2..7ccd02b3c86a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -49,7 +49,7 @@ class SIFrameLowering final : public AMDGPUFrameLowering { SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - unsigned getReservedPrivateSegmentWaveByteOffsetReg( + std::pair getReservedPrivateSegmentWaveByteOffsetReg( const SISubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index dd867b15b4c7..ce74a7cd8b04 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -287,8 +287,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // On SI this is s_memtime and s_memrealtime on VI. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Custom); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -1644,7 +1644,7 @@ computeIndirectRegAndOffset(const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, unsigned VecReg, int Offset) { - int NumElts = SuperRC->getSize() / 4; + int NumElts = TRI.getRegSizeInBits(*SuperRC) / 32; // Skip out of bounds offsets, or else we would end up using an undefined // register. @@ -1793,17 +1793,18 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, return LoopBB; } -static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) { - switch (VecRC->getSize()) { - case 4: +static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI, + const TargetRegisterClass *VecRC) { + switch (TRI.getRegSizeInBits(*VecRC)) { + case 32: // 4 bytes return AMDGPU::V_MOVRELD_B32_V1; - case 8: + case 64: // 8 bytes return AMDGPU::V_MOVRELD_B32_V2; - case 16: + case 128: // 16 bytes return AMDGPU::V_MOVRELD_B32_V4; - case 32: + case 256: // 32 bytes return AMDGPU::V_MOVRELD_B32_V8; - case 64: + case 512: // 64 bytes return AMDGPU::V_MOVRELD_B32_V16; default: llvm_unreachable("unsupported size for MOVRELD pseudos"); @@ -1863,7 +1864,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC)); + const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); BuildMI(MBB, I, DL, MovRelDesc) .addReg(Dst, RegState::Define) @@ -1907,7 +1908,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, .addReg(PhiReg, RegState::Implicit) .addReg(AMDGPU::M0, RegState::Implicit); } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC)); + const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); BuildMI(*LoopBB, InsPt, DL, MovRelDesc) .addReg(Dst, RegState::Define) @@ -1948,50 +1949,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } switch (MI.getOpcode()) { - case AMDGPU::S_TRAP_PSEUDO: { - const DebugLoc &DL = MI.getDebugLoc(); - const int TrapType = MI.getOperand(0).getImm(); - - if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && - Subtarget->isTrapHandlerEnabled()) { - - MachineFunction *MF = BB->getParent(); - SIMachineFunctionInfo *Info = MF->getInfo(); - unsigned UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); - - if (!BB->isLiveIn(UserSGPR)) - BB->addLiveIn(UserSGPR); - - BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) - .addReg(UserSGPR); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)) - .addImm(TrapType) - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); - } else { - switch (TrapType) { - case SISubtarget::TrapIDLLVMTrap: - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM)); - break; - case SISubtarget::TrapIDLLVMDebugTrap: { - DiagnosticInfoUnsupported NoTrap(*MF->getFunction(), - "debugtrap handler not supported", - DL, - DS_Warning); - LLVMContext &C = MF->getFunction()->getContext(); - C.diagnose(NoTrap); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP)) - .addImm(0); - break; - } - default: - llvm_unreachable("unsupported trap handler type!"); - } - } - - MI.eraseFromParent(); - return BB; - } case AMDGPU::SI_INIT_M0: BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) @@ -2163,6 +2120,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); + + case ISD::TRAP: + case ISD::DEBUGTRAP: + return lowerTRAP(Op, DAG); } return SDValue(); } @@ -2431,6 +2392,57 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);; } +SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Chain = Op.getOperand(0); + + unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ? + SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap; + + if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && + Subtarget->isTrapHandlerEnabled()) { + SIMachineFunctionInfo *Info = MF.getInfo(); + unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + assert(UserSGPR != AMDGPU::NoRegister); + + SDValue QueuePtr = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + + SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); + + SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, + QueuePtr, SDValue()); + + SDValue Ops[] = { + ToReg, + DAG.getTargetConstant(TrapID, SL, MVT::i16), + SGPR01, + ToReg.getValue(1) + }; + + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); + } + + switch (TrapID) { + case SISubtarget::TrapIDLLVMTrap: + return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); + case SISubtarget::TrapIDLLVMDebugTrap: { + DiagnosticInfoUnsupported NoTrap(*MF.getFunction(), + "debugtrap handler not supported", + Op.getDebugLoc(), + DS_Warning); + LLVMContext &Ctx = MF.getFunction()->getContext(); + Ctx.diagnose(NoTrap); + return Chain; + } + default: + llvm_unreachable("unsupported trap handler type!"); + } + + return Chain; +} + SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, SelectionDAG &DAG) const { // FIXME: Use inline constants (src_{shared, private}_base) instead. @@ -3410,9 +3422,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, EVT VT = Op.getValueType(); bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; + if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) + return SDValue(); + if (const ConstantFPSDNode *CLHS = dyn_cast(LHS)) { - if (Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || - VT == MVT::f16) { + if (Unsafe || VT == MVT::f32 || VT == MVT::f16) { if (CLHS->isExactlyValue(1.0)) { // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to // the CI documentation has a worst case error of 1 ulp. @@ -4696,7 +4710,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Src, Demanded) || + if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) || TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c2a3e62aa827..9122cd72d323 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -428,8 +428,8 @@ RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI, const MachineInstr &MIA = *MI; const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo); - unsigned Size = RC->getSize(); - Result.second = Result.first + (Size / 4); + unsigned Size = TRI->getRegSizeInBits(*RC); + Result.second = Result.first + (Size / 32); return Result; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 47257ce16ceb..9f32ecfa52ff 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -216,8 +216,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // XXX - What if this is a write into a super register? const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0); - unsigned Size = RC->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; + unsigned Size = TRI->getRegSizeInBits(*RC); + Result.Named.LGKM = Size > 32 ? 2 : 1; } else { // s_dcache_inv etc. do not have a a destination register. Assume we // want a wait on these. @@ -289,12 +289,12 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC, const MachineOperand &Reg) const { - unsigned Size = RC->getSize(); - assert(Size >= 4); + unsigned Size = TRI->getRegSizeInBits(*RC); + assert(Size >= 32); RegInterval Result; Result.first = TRI->getEncodingValue(Reg.getReg()); - Result.second = Result.first + Size / 4; + Result.second = Result.first + Size / 32; return Result; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 05ac67d26620..92e452a3d6a0 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -138,6 +138,11 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, } if (isSMRD(Opc0) && isSMRD(Opc1)) { + // Skip time and cache invalidation instructions. + if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || + AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) + return false; + assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); // Check base reg. @@ -245,11 +250,11 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, unsigned EltSize; if (LdSt.mayLoad()) - EltSize = getOpRegClass(LdSt, 0)->getSize() / 2; + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; else { assert(LdSt.mayStore()); int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); - EltSize = getOpRegClass(LdSt, Data0Idx)->getSize(); + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; } if (isStride64(Opc)) @@ -345,7 +350,7 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, FirstLdSt.getParent()->getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); - return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; + return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold; } static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, @@ -433,7 +438,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (RI.isSGPRClass(RC)) { - if (RC->getSize() > 4) { + if (RI.getRegSizeInBits(*RC) > 32) { Opcode = AMDGPU::S_MOV_B64; EltSize = 8; } else { @@ -493,11 +498,11 @@ int SIInstrInfo::commuteOpcode(unsigned Opcode) const { unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { - if (DstRC->getSize() == 4) { + if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { + } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { return AMDGPU::S_MOV_B64; - } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { + } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { return AMDGPU::V_MOV_B64_PSEUDO; } return AMDGPU::COPY; @@ -557,17 +562,18 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); + unsigned SpillSize = TRI->getSpillSize(*RC); if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. - const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize())); + const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } @@ -602,7 +608,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); + unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(isKill)) // data @@ -660,6 +666,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, DebugLoc DL = MBB.findDebugLoc(MI); unsigned Align = FrameInfo.getObjectAlignment(FrameIndex); unsigned Size = FrameInfo.getObjectSize(FrameIndex); + unsigned SpillSize = TRI->getSpillSize(*RC); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, FrameIndex); @@ -670,8 +677,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (RI.isSGPRClass(RC)) { // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. - const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize())); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { + const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); + if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } @@ -701,7 +708,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); + unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc @@ -1440,9 +1447,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); - unsigned DstSize = DstRC->getSize(); + unsigned DstSize = RI.getRegSizeInBits(*DstRC); - if (DstSize == 4) { + if (DstSize == 32) { unsigned SelOp = Pred == SCC_TRUE ? AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32; @@ -1456,7 +1463,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, return; } - if (DstSize == 8 && Pred == SCC_TRUE) { + if (DstSize == 64 && Pred == SCC_TRUE) { MachineInstr *Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) .addReg(FalseReg) @@ -1483,7 +1490,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; const int16_t *SubIndices = Sub0_15; - int NElts = DstSize / 4; + int NElts = DstSize / 32; // 64-bit select is only avaialble for SALU. if (Pred == SCC_TRUE) { @@ -2635,6 +2642,19 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) return; + // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for + // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane + // select is uniform. + if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && + RI.isVGPR(MRI, Src1.getReg())) { + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .add(Src1); + Src1.ChangeToRegister(Reg, false); + return; + } + // We do not use commuteInstruction here because it is too aggressive and will // commute if it is possible. We only want to commute here if it improves // legality. This can be called a fairly large number of times so don't waste @@ -2729,7 +2749,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); unsigned DstReg = MRI.createVirtualRegister(SRC); - unsigned SubRegs = VRC->getSize() / 4; + unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; SmallVector SRegs; for (unsigned i = 0; i < SubRegs; ++i) { @@ -3595,7 +3615,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, .addImm(16) .add(Src0); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) - .addImm(0xffff); + .addImm(0xffff0000); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg) .add(Src1) .addReg(ImmReg, RegState::Kill) diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 659473ca6a47..03a5ef74b179 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -626,13 +626,13 @@ class SIInstrInfo final : public AMDGPUInstrInfo { return 4; } - return RI.getRegClass(OpInfo.RegClass)->getSize(); + return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; } /// \brief This form should usually be preferred since it handles operands /// with unknown register classes. unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { - return getOpRegClass(MI, OpNo)->getSize(); + return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; } /// \returns true if it is legal for the operand at index \p OpNo diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c6daf743f3ac..7b052844f177 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -646,11 +646,10 @@ def DS64Bit4ByteAligned : ComplexPattern; def MOVRELOffset : ComplexPattern; def VOP3Mods0 : ComplexPattern; -def VOP3NoMods0 : ComplexPattern; def VOP3Mods0Clamp : ComplexPattern; def VOP3Mods0Clamp0OMod : ComplexPattern; def VOP3Mods : ComplexPattern; -def VOP3NoMods : ComplexPattern; +def VOP3NoMods : ComplexPattern; // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index 2f89503e129a..3f6ddec70479 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -94,6 +94,12 @@ defm V_INTERP_MOV_F32 : VINTRP_m < //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// +def ATOMIC_FENCE : SPseudoInstSI< + (outs), (ins i32imm:$ordering, i32imm:$scope), + [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))], + "ATOMIC_FENCE $ordering, $scope"> { + let hasSideEffects = 1; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { @@ -111,12 +117,6 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)>; } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] -def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> { - let hasSideEffects = 1; - let SALU = 1; - let usesCustomInserter = 1; -} - let usesCustomInserter = 1, SALU = 1 in { def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins), [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>; @@ -400,13 +400,8 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < let Predicates = [isGCN] in { def : Pat< - (trap), - (S_TRAP_PSEUDO TRAPID.LLVM_TRAP) ->; - -def : Pat< - (debugtrap), - (S_TRAP_PSEUDO TRAPID.LLVM_DEBUG_TRAP) + (AMDGPUtrap timm:$trapid), + (S_TRAP $trapid) >; def : Pat< @@ -477,8 +472,8 @@ def : Pat < // fp_to_fp16 patterns def : Pat < - (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))), - (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod) + (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < @@ -507,11 +502,11 @@ def : Pat < multiclass FMADPat { def : Pat < - (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3NoMods vt:$src1, i32:$src1_modifiers), - (VOP3NoMods vt:$src2, i32:$src2_modifiers))), - (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - $src2_modifiers, $src2, $clamp, $omod) + (vt (fmad (VOP3NoMods vt:$src0), + (VOP3NoMods vt:$src1), + (VOP3NoMods vt:$src2))), + (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; } @@ -681,10 +676,9 @@ def : BitConvert ; // If denormals are not enabled, it only impacts the compare of the // inputs. The output result is not flushed. class ClampPat : Pat < - (vt (AMDGPUclamp - (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))), + (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))), (inst i32:$src0_modifiers, vt:$src0, - i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod) + i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE) >; def : ClampPat; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 8e612d2ddfda..b6a982aee6be 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -25,6 +25,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) TIDReg(AMDGPU::NoRegister), ScratchRSrcReg(AMDGPU::NoRegister), ScratchWaveOffsetReg(AMDGPU::NoRegister), + FrameOffsetReg(AMDGPU::NoRegister), + StackPtrOffsetReg(AMDGPU::NoRegister), PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), DispatchPtrUserSGPR(AMDGPU::NoRegister), QueuePtrUserSGPR(AMDGPU::NoRegister), diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 810fb05984c4..dc9f509e60ae 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -88,6 +88,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned ScratchRSrcReg; unsigned ScratchWaveOffsetReg; + // This is the current function's incremented size from the kernel's scratch + // wave offset register. For an entry function, this is exactly the same as + // the ScratchWaveOffsetReg. + unsigned FrameOffsetReg; + + // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. + unsigned StackPtrOffsetReg; + // Input registers for non-HSA ABI unsigned PrivateMemoryPtrUserSGPR; @@ -364,9 +372,25 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { return ScratchWaveOffsetReg; } + unsigned getFrameOffsetReg() const { + return FrameOffsetReg; + } + + void setStackPtrOffsetReg(unsigned Reg) { + assert(Reg != AMDGPU::NoRegister && "Should never be unset"); + StackPtrOffsetReg = Reg; + } + + unsigned getStackPtrOffsetReg() const { + return StackPtrOffsetReg; + } + void setScratchWaveOffsetReg(unsigned Reg) { assert(Reg != AMDGPU::NoRegister && "Should never be unset"); ScratchWaveOffsetReg = Reg; + + // FIXME: Only for entry functions. + FrameOffsetReg = ScratchWaveOffsetReg; } unsigned getQueuePtrUserSGPR() const { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 098c67252dd8..8820e294562b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -146,6 +146,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::EXEC); reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); + // M0 has to be reserved so that llvm accepts it as a live-in into a block. + reserveRegisterTuples(Reserved, AMDGPU::M0); + // Reserve the memory aperture registers. reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); @@ -615,7 +618,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. - std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true); + std::tie(EltSize, ScalarStoreOp) = + getSpillEltSize(getRegSizeInBits(*RC) / 8, true); } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); @@ -775,7 +779,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. - std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false); + std::tie(EltSize, ScalarLoadOp) = + getSpillEltSize(getRegSizeInBits(*RC) / 8, false); } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); @@ -1038,20 +1043,21 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { // TODO: It might be helpful to have some target specific flags in // TargetRegisterClass to mark which classes are VGPRs to make this trivial. bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { - switch (RC->getSize()) { - case 0: return false; - case 1: return false; - case 4: - return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; - case 8: - return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; - case 12: - return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; - case 16: - return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; + unsigned Size = getRegSizeInBits(*RC); + if (Size < 32) + return false; + switch (Size) { case 32: - return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; + return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; case 64: + return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; + case 96: + return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; + case 128: + return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; + case 256: + return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; + case 512: return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; default: llvm_unreachable("Invalid register class size"); @@ -1060,18 +1066,18 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SRC) const { - switch (SRC->getSize()) { - case 4: - return &AMDGPU::VGPR_32RegClass; - case 8: - return &AMDGPU::VReg_64RegClass; - case 12: - return &AMDGPU::VReg_96RegClass; - case 16: - return &AMDGPU::VReg_128RegClass; + switch (getRegSizeInBits(*SRC)) { case 32: - return &AMDGPU::VReg_256RegClass; + return &AMDGPU::VGPR_32RegClass; case 64: + return &AMDGPU::VReg_64RegClass; + case 96: + return &AMDGPU::VReg_96RegClass; + case 128: + return &AMDGPU::VReg_128RegClass; + case 256: + return &AMDGPU::VReg_256RegClass; + case 512: return &AMDGPU::VReg_512RegClass; default: llvm_unreachable("Invalid register class size"); @@ -1080,16 +1086,16 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( const TargetRegisterClass *VRC) const { - switch (VRC->getSize()) { - case 4: - return &AMDGPU::SGPR_32RegClass; - case 8: - return &AMDGPU::SReg_64RegClass; - case 16: - return &AMDGPU::SReg_128RegClass; + switch (getRegSizeInBits(*VRC)) { case 32: - return &AMDGPU::SReg_256RegClass; + return &AMDGPU::SGPR_32RegClass; case 64: + return &AMDGPU::SReg_64RegClass; + case 128: + return &AMDGPU::SReg_128RegClass; + case 256: + return &AMDGPU::SReg_256RegClass; + case 512: return &AMDGPU::SReg_512RegClass; default: llvm_unreachable("Invalid register class size"); @@ -1354,15 +1360,15 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC) const { - unsigned SrcSize = SrcRC->getSize(); - unsigned DstSize = DstRC->getSize(); - unsigned NewSize = NewRC->getSize(); + unsigned SrcSize = getRegSizeInBits(*SrcRC); + unsigned DstSize = getRegSizeInBits(*DstRC); + unsigned NewSize = getRegSizeInBits(*NewRC); // Do not increase size of registers beyond dword, we would need to allocate // adjacent registers and constraint regalloc more than needed. // Always allow dword coalescing. - if (SrcSize <= 4 || DstSize <= 4) + if (SrcSize <= 32 || DstSize <= 32) return true; return NewSize <= DstSize || NewSize <= SrcSize; diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td index b4adbdd1df07..593439c2a3cd 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -530,14 +530,16 @@ class SOPKInstTable { class SOPK_32 pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), - (ins u16imm:$simm16), + (ins s16imm:$simm16), "$sdst, $simm16", pattern>; -class SOPK_SCC : SOPK_Pseudo < +class SOPK_SCC : SOPK_Pseudo < opName, (outs), - (ins SReg_32:$sdst, u16imm:$simm16), + !if(isSignExt, + (ins SReg_32:$sdst, s16imm:$simm16), + (ins SReg_32:$sdst, u16imm:$simm16)), "$sdst, $simm16", []>, SOPKInstTable<1, base_op>{ let Defs = [SCC]; @@ -546,7 +548,7 @@ class SOPK_SCC : SOPK_Pseudo < class SOPK_32TIE pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), - (ins SReg_32:$src0, u16imm:$simm16), + (ins SReg_32:$src0, s16imm:$simm16), "$sdst, $simm16", pattern >; @@ -575,20 +577,20 @@ let isCompare = 1 in { // [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))] // >; -def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">; -def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">; -def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">; -def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">; -def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">; -def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">; +def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32", 1>; +def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32", 1>; +def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32", 1>; +def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32", 1>; +def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32", 1>; +def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32", 1>; let SOPKZext = 1 in { -def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">; -def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">; -def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">; -def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">; -def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">; -def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">; +def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32", 0>; +def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32", 0>; +def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32", 0>; +def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32", 0>; +def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32", 0>; +def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32", 0>; } // End SOPKZext = 1 } // End isCompare = 1 @@ -600,7 +602,7 @@ let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", def S_CBRANCH_I_FORK : SOPK_Pseudo < "s_cbranch_i_fork", - (outs), (ins SReg_64:$sdst, u16imm:$simm16), + (outs), (ins SReg_64:$sdst, s16imm:$simm16), "$sdst, $simm16" >; diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 86095a8e1142..5a3242bed1d0 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -93,6 +93,12 @@ unsigned getVmcntBitWidthHi() { return 2; } } // end namespace anonymous namespace llvm { + +static cl::opt EnablePackedInlinableLiterals( + "enable-packed-inlinable-literals", + cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), + cl::init(false)); + namespace AMDGPU { namespace IsaInfo { @@ -703,6 +709,9 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { assert(HasInv2Pi); + if (!EnablePackedInlinableLiterals) + return false; + int16_t Lo16 = static_cast(Literal); int16_t Hi16 = static_cast(Literal >> 16); return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 4f5711ca9a79..5c9d589e2625 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -905,7 +905,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::STRi12)) @@ -1103,7 +1103,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index faf1c631a3a7..28c407f74125 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -105,10 +105,6 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { // Return whether the target has an explicit NOP encoding. bool hasNOP() const; - virtual void getNoopForElfTarget(MCInst &NopInst) const { - getNoopForMachoTarget(NopInst); - } - // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 70a44eaaceb8..a20887564f44 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -806,7 +806,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, if (!DstSubReg) return true; // Small registers don't frequently cause a problem, so we can coalesce them. - if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + if (getRegSizeInBits(*NewRC) < 256 && getRegSizeInBits(*DstRC) < 256 && + getRegSizeInBits(*SrcRC) < 256) return true; auto NewRCWeight = diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp index 94b317a8f986..13fb30767c9f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -35,7 +35,8 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { EVT VT = TLI.getValueType(DL, T, true); - if (!VT.isSimple() || VT.isVector()) + if (!VT.isSimple() || VT.isVector() || + !(VT.isInteger() || VT.isFloatingPoint())) return false; unsigned VTSize = VT.getSimpleVT().getSizeInBits(); diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e0aecff2633b..78a9144bd321 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -661,7 +661,6 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { return false; case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: - case MachineOperand::MO_Placeholder: llvm_unreachable("should not exist post-isel"); } llvm_unreachable("unhandled machine operand type"); diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 70dbe1bc5b95..4f7a0ab4e220 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1960,10 +1960,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. assert(RS && "Register scavenging not provided"); - const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + const TargetRegisterClass &RC = ARM::GPRRegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 165e9b7378c7..382f881f7741 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3358,8 +3358,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - // FIXME: handle "fence singlethread" more efficiently. SDLoc dl(Op); + ConstantSDNode *ScopeN = cast(Op.getOperand(2)); + auto Scope = static_cast(ScopeN->getZExtValue()); + if (Scope == SynchronizationScope::SingleThread) + return Op; + if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get @@ -9476,8 +9480,11 @@ AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } - // Don't generate vpaddl+vmovn; we'll match it to vpadd later. - if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) + // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure + // we're using the entire input vector, otherwise there's a size/legality + // mismatch somewhere. + if (nextIndex != Vec.getValueType().getVectorNumElements() || + Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) return SDValue(); // Create VPADDL node. diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 3b3606ef462a..a0e2ac4cbc6f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -32,8 +32,8 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void ARMInstrInfo::getNoop(MCInst &NopInst) const { if (hasNOP()) { NopInst.setOpcode(ARM::HINT); NopInst.addOperand(MCOperand::createImm(0)); diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h index 4b1b7097b18d..c87fb97448c9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -25,8 +25,8 @@ class ARMInstrInfo : public ARMBaseInstrInfo { public: explicit ARMInstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 703e8071b177..9d8ee5c3f9dc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -5975,3 +5975,10 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } + +def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, + [(atomic_fence imm:$ordering, 0)]> { + let hasSideEffects = 1; + let Size = 0; + let AsmString = "@ COMPILER BARRIER"; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index f2f426e86701..8048c758e998 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -953,7 +953,7 @@ let isAdd = 1 in { /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { - let isCommutable = 1 in + let isCommutable = 1, Uses = [CPSR] in def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm, @@ -1292,6 +1292,7 @@ def tSUBrr : // A8.6.212 /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { + let Uses = [CPSR] in def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 816596b85721..1c13d51a468e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -47,12 +47,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, unsigned SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); (void)SrcSize; - assert((DstSize == SrcSize || - // Copies are a means to setup initial types, the number of - // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - DstSize <= SrcSize)) && - "Copy with different width?!"); + // We use copies for trunc, so it's ok for the size of the destination to be + // smaller (the higher bits will just be undefined). + assert(DstSize <= SrcSize && "Copy with different width?!"); assert((RegBank->getID() == ARM::GPRRegBankID || RegBank->getID() == ARM::FPRRegBankID) && @@ -294,6 +291,28 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } break; } + case G_TRUNC: { + // The high bits are undefined, so there's nothing special to do, just + // treat it as a copy. + auto SrcReg = I.getOperand(1).getReg(); + auto DstReg = I.getOperand(0).getReg(); + + const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + if (SrcRegBank.getID() != DstRegBank.getID()) { + DEBUG(dbgs() << "G_TRUNC operands on different register banks\n"); + return false; + } + + if (SrcRegBank.getID() != ARM::GPRRegBankID) { + DEBUG(dbgs() << "G_TRUNC on non-GPR not supported yet\n"); + return false; + } + + I.setDesc(TII.get(COPY)); + return selectCopy(I, TII, MRI, TRI, RBI); + } case G_ADD: case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); @@ -313,6 +332,16 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; + case G_SDIV: + assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation"); + I.setDesc(TII.get(ARM::SDIV)); + MIB.add(predOps(ARMCC::AL)); + break; + case G_UDIV: + assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation"); + I.setDesc(TII.get(ARM::UDIV)); + MIB.add(predOps(ARMCC::AL)); + break; case G_FADD: if (!selectFAdd(MIB, TII, MRI)) return false; @@ -332,6 +361,18 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { "Expected constant to live in a GPR"); I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + + auto &Val = I.getOperand(1); + if (Val.isCImm()) { + if (Val.getCImm()->getBitWidth() > 32) + return false; + Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); + } + + if (!Val.isImm()) { + return false; + } + break; } case G_STORE: diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index fe9681439e6b..9b86030fdd29 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -13,6 +13,8 @@ #include "ARMLegalizerInfo.h" #include "ARMSubtarget.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" @@ -47,6 +49,18 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { for (auto Ty : {s1, s8, s16, s32}) setAction({Op, Ty}, Legal); + for (unsigned Op : {G_SDIV, G_UDIV}) { + for (auto Ty : {s8, s16}) + // FIXME: We need WidenScalar here, but in the case of targets with + // software division we'll also need Libcall afterwards. Treat as Custom + // until we have better support for chaining legalization actions. + setAction({Op, Ty}, Custom); + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Legal); + else + setAction({Op, s32}, Libcall); + } + for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); for (auto Ty : {s1, s8, s16}) @@ -75,3 +89,48 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { computeTables(); } + +bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + using namespace TargetOpcode; + + switch (MI.getOpcode()) { + default: + return false; + case G_SDIV: + case G_UDIV: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8)) + return false; + + // We need to widen to 32 bits and then maybe, if the target requires, + // transform into a libcall. + LegalizerHelper Helper(MIRBuilder.getMF()); + + MachineInstr *NewMI = nullptr; + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + // Store the new, 32-bit div instruction. + if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV) + NewMI = MI; + }); + + auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32)); + Helper.MIRBuilder.stopRecordingInsertions(); + if (Result == LegalizerHelper::UnableToLegalize) { + return false; + } + assert(NewMI && "Couldn't find widened instruction"); + assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) && + "Unexpected widened instruction"); + assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 && + "Unexpected type for the widened instruction"); + + Result = Helper.legalizeInstrStep(*NewMI); + if (Result == LegalizerHelper::UnableToLegalize) { + return false; + } + return true; + } + } +} diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h index 0b8a608a6bde..a9bdd367737e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h @@ -24,6 +24,9 @@ class ARMSubtarget; class ARMLegalizerInfo : public LegalizerInfo { public: ARMLegalizerInfo(const ARMSubtarget &ST); + + bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; }; } // End llvm namespace. #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 0fd98268723a..9e9c1ba6c114 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -211,11 +211,9 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) .addImm(ARMCC::AL).addReg(0)); MCInst Noop; - Subtarget->getInstrInfo()->getNoopForElfTarget(Noop); + Subtarget->getInstrInfo()->getNoop(Noop); for (int8_t I = 0; I < NoopsInSledCount; I++) - { OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); - } OutStreamer->EmitLabel(Target); recordSled(CurSled, MI, Kind); diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index e47bd3a8963e..7325817d446b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -221,8 +221,11 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_ADD: case G_SUB: case G_MUL: + case G_SDIV: + case G_UDIV: case G_SEXT: case G_ZEXT: + case G_TRUNC: case G_GEP: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 27bff4d75acf..0ebf55924647 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -24,8 +24,8 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb1InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tMOVr); NopInst.addOperand(MCOperand::createReg(ARM::R8)); NopInst.addOperand(MCOperand::createReg(ARM::R8)); diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 931914ad2799..e8d9a9c4ff14 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -25,8 +25,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 818ba85c7d40..2e2dfe035e26 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -32,8 +32,8 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb2InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tHINT); NopInst.addOperand(MCOperand::createImm(0)); NopInst.addOperand(MCOperand::createImm(ARMCC::AL)); diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 15d63300b6a2..c834ba73bfea 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -26,8 +26,8 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/contrib/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/contrib/llvm/lib/Target/AVR/AVRAsmPrinter.cpp index 50bb50b44f27..d6491ce5c3bf 100644 --- a/contrib/llvm/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRAsmPrinter.cpp @@ -112,7 +112,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const AVRSubtarget &STI = MF->getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - unsigned BytesPerReg = TRI.getMinimalPhysRegClass(Reg)->getSize(); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + unsigned BytesPerReg = TRI.getRegSizeInBits(*RC) / 8; assert(BytesPerReg <= 2 && "Only 8 and 16 bit regs are supported."); unsigned RegIdx = ByteNumber / BytesPerReg; diff --git a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 13080a5d72f0..540e05a92997 100644 --- a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -88,6 +88,9 @@ class AVRExpandPseudo : public MachineFunctionPass { unsigned ArithOpcode, Block &MBB, BlockIt MBBI); + + /// Scavenges a free GPR8 register for use. + unsigned scavengeGPR8(MachineInstr &MI); }; char AVRExpandPseudo::ID = 0; @@ -577,24 +580,43 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned OpLo, OpHi, DstLoReg, DstHiReg; unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::LDRdPtr; OpHi = AVR::LDDRdPtrQ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same"); + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + + // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(CurDstLoReg, RegState::Define) .addReg(SrcReg); + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); + + // Load high byte. auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(CurDstHiReg, RegState::Define) .addReg(SrcReg, getKillRegState(SrcIsKill)) .addImm(1); + if (TmpReg) { + // Move the high byte into the final destination. + buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); + + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); + } + MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -669,9 +691,9 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned OpLo, OpHi, DstLoReg, DstHiReg; unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); - bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::LDDRdPtrQ; OpHi = AVR::LDDRdPtrQ; @@ -679,60 +701,35 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { assert(Imm <= 63 && "Offset is out of range"); - MachineInstr *MIBLO, *MIBHI; + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); - // HACK: We shouldn't have instances of this instruction - // where src==dest because the instruction itself is - // marked earlyclobber. We do however get this instruction when - // loading from stack slots where the earlyclobber isn't useful. - // - // In this case, just use a temporary register. - if (DstReg == SrcReg) { - RegScavenger RS; + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; - RS.enterBasicBlock(MBB); - RS.forward(MBBI); + // Load low byte. + auto MIBLO = buildMI(MBB, MBBI, OpLo) + .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg) + .addImm(Imm); - BitVector Candidates = - TRI->getAllocatableSet - (*MBB.getParent(), &AVR::GPR8RegClass); + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); - // Exclude all the registers being used by the instruction. - for (MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - Candidates.reset(MO.getReg()); - } - - BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass); - Available &= Candidates; - - signed TmpReg = Available.find_first(); - assert(TmpReg != -1 && "ran out of registers"); - - MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(TmpReg, RegState::Define) - .addReg(SrcReg) - .addImm(Imm); - - buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstLoReg).addReg(TmpReg); - - MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(TmpReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1); + // Load high byte. + auto MIBHI = buildMI(MBB, MBBI, OpHi) + .addReg(CurDstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(Imm + 1); + if (TmpReg) { + // Move the high byte into the final destination. buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); - } else { - MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(SrcReg) - .addImm(Imm); - MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1); + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); } MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -819,6 +816,32 @@ bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width, }); } +unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + RegScavenger RS; + + RS.enterBasicBlock(MBB); + RS.forward(MI); + + BitVector Candidates = + TRI->getAllocatableSet + (*MBB.getParent(), &AVR::GPR8RegClass); + + // Exclude all the registers being used by the instruction. + for (MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + Candidates.reset(MO.getReg()); + } + + BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass); + Available &= Candidates; + + signed Reg = Available.find_first(); + assert(Reg != -1 && "ran out of registers"); + return Reg; +} + template<> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI); @@ -948,7 +971,6 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { unsigned OpLo, OpHi, SrcLoReg, SrcHiReg; unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); - bool DstIsKill = MI.getOperand(0).isKill(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::STPtrRr; OpHi = AVR::STDPtrQRr; @@ -960,7 +982,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { .addReg(SrcLoReg, getKillRegState(SrcIsKill)); auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg) .addImm(1) .addReg(SrcHiReg, getKillRegState(SrcIsKill)); diff --git a/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp index b8cb2215ddb4..ab42a7aa9901 100644 --- a/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -239,7 +239,7 @@ bool AVRFrameLowering::spillCalleeSavedRegisters( unsigned Reg = CSI[i - 1].getReg(); bool IsNotLiveIn = !MBB.isLiveIn(Reg); - assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 && + assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 && "Invalid register size"); // Add the callee-saved register as live-in only if it is not already a @@ -277,7 +277,7 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters( for (const CalleeSavedInfo &CCSI : CSI) { unsigned Reg = CCSI.getReg(); - assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 && + assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 && "Invalid register size"); BuildMI(MBB, MI, DL, TII.get(AVR::POPRd), Reg); diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 88f889260cce..afba66b2e69b 100644 --- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -142,9 +142,9 @@ void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlignment(FrameIndex)); unsigned Opcode = 0; - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::STDPtrQRr; - } else if (RC->hasType(MVT::i16)) { + } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { Opcode = AVR::STDWPtrQRr; } else { llvm_unreachable("Cannot store this register into a stack slot!"); @@ -176,9 +176,9 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlignment(FrameIndex)); unsigned Opcode = 0; - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::LDDRdPtrQ; - } else if (RC->hasType(MVT::i16)) { + } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { // Opcode = AVR::LDDWRdPtrQ; //:FIXME: remove this once PR13375 gets fixed Opcode = AVR::LDDWRdYQ; diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp index 48798bd4a1da..5cc7eaf8add3 100644 --- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp @@ -78,11 +78,12 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { const TargetRegisterClass * AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const { - if (RC->hasType(MVT::i16)) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { return &AVR::DREGSRegClass; } - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { return &AVR::GPR8RegClass; } diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp index cb3049bf1500..07767d1037a9 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -347,7 +347,7 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub); const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS); - uint16_t BW = RC->getSize()*8; + uint16_t BW = TRI.getRegSizeInBits(*RC); return BW; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index fda23f8f6b05..c8483f7e6e76 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -286,9 +286,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo(); const MachineFunction &MF = *MI.getParent()->getParent(); const auto &HST = MF.getSubtarget(); - unsigned VectorSize = HST.useHVXSglOps() - ? Hexagon::VectorRegsRegClass.getSize() - : Hexagon::VectorRegs128BRegClass.getSize(); + const auto &VecRC = HST.useHVXSglOps() ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned VectorSize = HST.getRegisterInfo()->getSpillSize(VecRC); switch (Inst.getOpcode()) { default: return; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 61f290ca98d7..8502bf24c02f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -407,7 +407,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); if (RR.Sub == 0) { Begin = 0; - Width = RC->getSize()*8; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC); return true; } @@ -417,7 +417,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, case Hexagon::DoubleRegsRegClassID: case Hexagon::VecDblRegsRegClassID: case Hexagon::VecDblRegs128BRegClassID: - Width = RC->getSize()*8 / 2; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2; if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) Begin = Width; break; @@ -1054,8 +1054,8 @@ namespace { class RedundantInstrElimination : public Transformation { public: RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, - MachineRegisterInfo &mri) - : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; @@ -1070,6 +1070,7 @@ namespace { bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); const HexagonInstrInfo &HII; + const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; BitTracker &BT; }; @@ -1262,7 +1263,7 @@ bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, assert(MI.getOperand(OpN).isReg()); BitTracker::RegisterRef RR = MI.getOperand(OpN); const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); - uint16_t Width = RC->getSize()*8; + uint16_t Width = HRI.getRegSizeInBits(*RC); if (!GotBits) T.set(Begin, Begin+Width); @@ -2173,8 +2174,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI, const RegisterSet &AVs) { if (!GenBitSplit) return false; - if (CountBitSplit >= MaxBitSplit) - return false; + if (MaxBitSplit.getNumOccurrences()) { + if (CountBitSplit >= MaxBitSplit) + return false; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2253,7 +2256,8 @@ bool BitSimplification::genBitSplit(MachineInstr *MI, continue; // Generate bitsplit where S is defined. - CountBitSplit++; + if (MaxBitSplit.getNumOccurrences()) + CountBitSplit++; MachineInstr *DefS = MRI.getVRegDef(S); assert(DefS != nullptr); DebugLoc DL = DefS->getDebugLoc(); @@ -2379,9 +2383,11 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI, const RegisterSet &AVs) { if (!GenExtract) return false; - if (CountExtract >= MaxExtract) - return false; - CountExtract++; + if (MaxExtract.getNumOccurrences()) { + if (CountExtract >= MaxExtract) + return false; + CountExtract++; + } unsigned W = RC.width(); unsigned RW = W; @@ -2651,7 +2657,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { Changed |= visitBlock(Entry, ImmG, AIG); RegisterSet ARE; // Available registers for RIE. - RedundantInstrElimination RIE(BT, HII, MRI); + RedundantInstrElimination RIE(BT, HII, HRI, MRI); bool Ried = visitBlock(Entry, RIE, ARE); if (Ried) { Changed = true; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp index d8ba5dcd35ad..9f8c9ded8127 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -559,10 +559,10 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, } unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); - switch (RC->getSize()) { - case 4: + switch (TRI->getRegSizeInBits(*RC)) { + case 32: return IfTrue ? A2_tfrt : A2_tfrf; - case 8: + case 64: return IfTrue ? A2_tfrpt : A2_tfrpf; } llvm_unreachable("Invalid register operand"); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 0e2380f4316a..a04aca4afa0f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1425,7 +1425,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, if (!SRegs[S->Reg]) continue; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); - int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), S->Offset); + int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset); MinOffset = std::min(MinOffset, S->Offset); CSI.push_back(CalleeSavedInfo(S->Reg, FI)); SRegs[S->Reg] = false; @@ -1437,11 +1437,12 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { unsigned R = x; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); - int Off = MinOffset - RC->getSize(); - unsigned Align = std::min(RC->getAlignment(), getStackAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + int Off = MinOffset - Size; + unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment()); assert(isPowerOf2_32(Align)); Off &= -Align; - int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), Off); + int FI = MFI.CreateFixedSpillStackObject(Size, Off); MinOffset = std::min(MinOffset, Off); CSI.push_back(CalleeSavedInfo(R, FI)); SRegs[R] = false; @@ -1677,10 +1678,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - unsigned Size = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned Size = HRI.getSpillSize(RC); + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned StoreOpc; @@ -1734,10 +1735,10 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - unsigned Size = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned Size = HRI.getSpillSize(RC); + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned LoadOpc; @@ -1777,16 +1778,16 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, if (!MI->getOperand(0).isFI()) return false; + auto &HRI = *HST.getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned StoreOpc; @@ -1815,15 +1816,15 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, if (!MI->getOperand(1).isFI()) return false; + auto &HRI = *HST.getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned LoadOpc; @@ -1932,7 +1933,7 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, if (!needToReserveScavengingSpillSlots(MF, HRI, RC)) continue; unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1; - unsigned S = RC->getSize(), A = RC->getAlignment(); + unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC); for (unsigned i = 0; i < Num; i++) { int NewFI = MFI.CreateSpillStackObject(S, A); RS->addScavengingFrameIndex(NewFI); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index b5948475e1f7..1829c5da02a6 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -26,6 +26,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include @@ -1206,10 +1207,9 @@ bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, if (!T) return false; - unsigned BW = T->getBitWidth(); - APInt K0(BW, 0), K1(BW, 0); - computeKnownBits(V, K0, K1, DL); - return K0.countLeadingOnes() >= IterCount; + KnownBits Known(T->getBitWidth()); + computeKnownBits(V, Known, DL); + return Known.Zero.countLeadingOnes() >= IterCount; } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td index c0c29b992238..22fc2474fae6 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td +++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td @@ -122,6 +122,7 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), } let usesCustomInserter = 1 in { + let Uses = [SR] in { def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), "# Select8 PSEUDO", [(set GR8:$dst, @@ -130,6 +131,7 @@ let usesCustomInserter = 1 in { "# Select16 PSEUDO", [(set GR16:$dst, (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>; + } let Defs = [SR] in { def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), "# Shl8 PSEUDO", diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 2a9d96205eb9..134f7ac3aea3 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -273,9 +273,9 @@ void MipsAsmPrinter::printSavedRegsBitmask() { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); // size of stack area to which FP callee-saved regs are saved. - unsigned CPURegSize = Mips::GPR32RegClass.getSize(); - unsigned FGR32RegSize = Mips::FGR32RegClass.getSize(); - unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize(); + unsigned CPURegSize = TRI->getRegSizeInBits(Mips::GPR32RegClass) / 8; + unsigned FGR32RegSize = TRI->getRegSizeInBits(Mips::FGR32RegClass) / 8; + unsigned AFGR64RegSize = TRI->getRegSizeInBits(Mips::AFGR64RegClass) / 8; bool HasAFGR64Reg = false; unsigned CSFPRegsSize = 0; diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp index 7af988c1f64d..cb9f676c237a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp @@ -38,7 +38,7 @@ static bool isF128SoftLibCall(const char *CallSym) { /// This function returns true if Ty is fp128, {f128} or i128 which was /// originally a fp128. -static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) { +static bool originalTypeIsF128(const Type *Ty, const char *Func) { if (Ty->isFP128Ty()) return true; @@ -46,12 +46,9 @@ static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) { Ty->getStructElementType(0)->isFP128Ty()) return true; - const ExternalSymbolSDNode *ES = - dyn_cast_or_null(CallNode); - // If the Ty is i128 and the function being called is a long double emulation // routine, then the original type is f128. - return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol())); + return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); } MipsCCState::SpecialCallingConvType @@ -73,11 +70,11 @@ MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee, void MipsCCState::PreAnalyzeCallResultForF128( const SmallVectorImpl &Ins, - const TargetLowering::CallLoweringInfo &CLI) { + const Type *RetTy, const char *Call) { for (unsigned i = 0; i < Ins.size(); ++i) { OriginalArgWasF128.push_back( - originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode())); - OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy()); + originalTypeIsF128(RetTy, Call)); + OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy()); } } @@ -99,10 +96,10 @@ void MipsCCState::PreAnalyzeReturnForF128( void MipsCCState::PreAnalyzeCallOperands( const SmallVectorImpl &Outs, std::vector &FuncArgs, - const SDNode *CallNode) { + const char *Func) { for (unsigned i = 0; i < Outs.size(); ++i) { OriginalArgWasF128.push_back( - originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode)); + originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func)); OriginalArgWasFloat.push_back( FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy()); CallOperandIsFixed.push_back(Outs[i].IsFixed); diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.h b/contrib/llvm/lib/Target/Mips/MipsCCState.h index 081c393a09be..77ecc65b2eee 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCCState.h +++ b/contrib/llvm/lib/Target/Mips/MipsCCState.h @@ -31,7 +31,7 @@ class MipsCCState : public CCState { /// Identify lowered values that originated from f128 arguments and record /// this for use by RetCC_MipsN. void PreAnalyzeCallResultForF128(const SmallVectorImpl &Ins, - const TargetLowering::CallLoweringInfo &CLI); + const Type *RetTy, const char * Func); /// Identify lowered values that originated from f128 arguments and record /// this for use by RetCC_MipsN. @@ -42,7 +42,7 @@ class MipsCCState : public CCState { void PreAnalyzeCallOperands(const SmallVectorImpl &Outs, std::vector &FuncArgs, - const SDNode *CallNode); + const char *Func); /// Identify lowered values that originated from f128 arguments and record /// this. @@ -73,8 +73,8 @@ class MipsCCState : public CCState { AnalyzeCallOperands(const SmallVectorImpl &Outs, CCAssignFn Fn, std::vector &FuncArgs, - const SDNode *CallNode) { - PreAnalyzeCallOperands(Outs, FuncArgs, CallNode); + const char *Func) { + PreAnalyzeCallOperands(Outs, FuncArgs, Func); CCState::AnalyzeCallOperands(Outs, Fn); OriginalArgWasF128.clear(); OriginalArgWasFloat.clear(); @@ -99,9 +99,9 @@ class MipsCCState : public CCState { } void AnalyzeCallResult(const SmallVectorImpl &Ins, - CCAssignFn Fn, - const TargetLowering::CallLoweringInfo &CLI) { - PreAnalyzeCallResultForF128(Ins, CLI); + CCAssignFn Fn, const Type *RetTy, + const char *Func) { + PreAnalyzeCallResultForF128(Ins, RetTy, Func); CCState::AnalyzeCallResult(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp index c060cf06099d..a5c7bf7699ea 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -1260,8 +1260,10 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, emitInst(Mips::ADJCALLSTACKUP).addImm(16).addImm(0); if (RetVT != MVT::isVoid) { SmallVector RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, RetCC_Mips); + MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); + + CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy, + CLI.Symbol->getName().data()); // Only handle a single return value. if (RVLocs.size() != 1) diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp index b2cf03976f81..ef05166503b2 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp @@ -119,7 +119,7 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { // Conservatively assume all callee-saved registers will be saved. for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { - unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize(); + unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); Offset = alignTo(Offset + Size, Size); } diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index 93c5f496ce97..8f39ebd42a5c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -2750,7 +2750,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // caller side but removing it breaks the frame size calculation. CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); - CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode()); + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(Callee.getNode()); + CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), + ES ? ES->getSymbol() : nullptr); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); @@ -2985,7 +2988,11 @@ SDValue MipsTargetLowering::LowerCallResult( SmallVector RVLocs; MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI); + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(CLI.Callee.getNode()); + CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI.RetTy, + ES ? ES->getSymbol() : nullptr); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp index 5bf4c958c7b9..63034ecab93b 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp @@ -40,11 +40,7 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { const TargetRegisterClass *RC = STI.inMips16Mode() ? &Mips::CPU16RegsRegClass - : STI.inMicroMipsMode() - ? STI.hasMips64() - ? &Mips::GPRMM16_64RegClass - : &Mips::GPRMM16RegClass - : static_cast(MF.getTarget()) + : static_cast(MF.getTarget()) .getABI() .IsN64() ? &Mips::GPR64RegClass @@ -53,14 +49,15 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { } void MipsFunctionInfo::createEhDataRegsFI() { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); for (int I = 0; I < 4; ++I) { - const TargetRegisterClass *RC = + const TargetRegisterClass &RC = static_cast(MF.getTarget()).getABI().IsN64() - ? &Mips::GPR64RegClass - : &Mips::GPR32RegClass; + ? Mips::GPR64RegClass + : Mips::GPR32RegClass; - EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), false); } } @@ -69,11 +66,12 @@ void MipsFunctionInfo::createISRRegFI() { // The current implementation only supports Mips32r2+ not Mips64rX. Status // is always 32 bits, ErrorPC is 32 or 64 bits dependent on architecture, // however Mips32r2+ is the supported architecture. - const TargetRegisterClass *RC = &Mips::GPR32RegClass; + const TargetRegisterClass &RC = Mips::GPR32RegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); for (int I = 0; I < 2; ++I) ISRDataRegFI[I] = MF.getFrameInfo().CreateStackObject( - RC->getSize(), RC->getAlignment(), false); + TRI.getSpillSize(RC), TRI.getSpillAlignment(RC), false); } bool MipsFunctionInfo::isEhDataRegFI(int FI) const { @@ -93,9 +91,10 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) { } int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); if (MoveF64ViaSpillFI == -1) { MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject( - RC->getSize(), RC->getAlignment(), false); + TRI.getSpillSize(*RC), TRI.getSpillAlignment(*RC), false); } return MoveF64ViaSpillFI; } diff --git a/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp index f33857fe628f..68dcbdfb4211 100644 --- a/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -116,9 +116,10 @@ static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) { /// Return type of register Reg. static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); - assert(RC->vt_end() - RC->vt_begin() == 1); - return *RC->vt_begin(); + assert(TRI.legalclasstypes_end(*RC) - TRI.legalclasstypes_begin(*RC) == 1); + return *TRI.legalclasstypes_begin(*RC); } /// Do the following transformation: diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index ef8d18c6deb1..e765b4625206 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -260,7 +260,8 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, // copy dst_hi, $vr1 unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); - unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2; + const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst); + unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16; const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize); unsigned VR0 = MRI.createVirtualRegister(RC); unsigned VR1 = MRI.createVirtualRegister(RC); @@ -858,6 +859,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsABIInfo ABI = STI.getABI(); unsigned FP = ABI.GetFramePtr(); @@ -883,10 +885,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, if (ExpandPseudo(MF).expand()) { // The spill slot should be half the size of the accumulator. If target is // mips64, it should be 64-bit, otherwise it should be 32-bt. - const TargetRegisterClass *RC = STI.hasMips64() ? - &Mips::GPR64RegClass : &Mips::GPR32RegClass; - int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + const TargetRegisterClass &RC = STI.hasMips64() ? + Mips::GPR64RegClass : Mips::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlignment(RC), + false); RS->addScavengingFrameIndex(FI); } @@ -897,10 +900,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, if (isInt<16>(MaxSPOffset)) return; - const TargetRegisterClass *RC = - ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + const TargetRegisterClass &RC = + ABI.ArePtrs64bit() ? Mips::GPR64RegClass : Mips::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlignment(RC), + false); RS->addScavengingFrameIndex(FI); } diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index 91e712a7a54e..ee074798563d 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -207,13 +207,16 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC164; - else if (RC->hasType(MVT::v16i8)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::ST_B; - else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || + TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::ST_H; - else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::ST_W; - else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::ST_D; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; @@ -280,13 +283,16 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; - else if (RC->hasType(MVT::v16i8)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; - else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || + TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; - else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; - else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; @@ -567,8 +573,8 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MCInstrDesc &Desc = get(Opc); assert(Desc.NumOperands == 2 && "Unary instruction expected."); const MipsRegisterInfo *RI = &getRegisterInfo(); - unsigned DstRegSize = getRegClass(Desc, 0, RI, MF)->getSize(); - unsigned SrcRegSize = getRegClass(Desc, 1, RI, MF)->getSize(); + unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); + unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); } diff --git a/contrib/llvm/lib/Target/Mips/Relocation.txt b/contrib/llvm/lib/Target/Mips/Relocation.txt new file mode 100644 index 000000000000..f1a6fd8645f6 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/Relocation.txt @@ -0,0 +1,125 @@ +MIPS Relocation Principles + +In LLVM, there are several elements of the llvm::ISD::NodeType enum +that deal with addresses and/or relocations. These are defined in +include/llvm/Target/TargetSelectionDAG.td, namely: + GlobalAddress, GlobalTLSAddress, JumpTable, ConstantPool, + ExternalSymbol, BlockAddress +The MIPS backend uses several principles to handle these. + +1. Code for lowering addresses references to machine dependent code is +factored into common code for generating different address forms and +is called by the relocation model specific lowering function, using +templated functions. For example: + + // lib/Target/Mips/MipsISelLowering.cpp + SDValue MipsTargetLowering:: + lowerJumpTable(SDValue Op, SelectionDAG &DAG) const + +calls + + template // lib/Target/Mips/MipsISelLowering.h + SDValue getAddrLocal(NodeTy *N, const SDLoc &DL, EVT Ty, + SelectionDAG &DAG, bool IsN32OrN64) const + +which calls the overloaded function: + + // lib/Target/Mips/MipsISelLowering.h + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + +2. Generic address nodes are lowered to some combination of target +independent and machine specific SDNodes (for example: +MipsISD::{Highest, Higher, Hi, Lo}) depending upon relocation model, +ABI, and compilation options. + +The choice of specific instructions that are to be used is delegated +to ISel which in turn relies on TableGen patterns to choose subtarget +specific instructions. For example, in getAddrLocal, the pseudo-code +generated is: + + (add (load (wrapper $gp, %got(sym)), %lo(sym)) + +where "%lo" represents an instance of an SDNode with opcode +"MipsISD::Lo", "wrapper" indicates one with opcode "MipsISD::Wrapper", +and "%got" the global table pointer "getGlobalReg(...)". The "add" is +"ISD::ADD", not a target dependent one. + +3. A TableGen multiclass pattern "MipsHiLoRelocs" is used to define a +template pattern parameterized over the load upper immediate +instruction, add operation, the zero register, and register class. +Here the instantiation of MipsHiLoRelocs in MipsInstrInfo.td is used +to MIPS32 to compute addresses for the static relocation model. + + // lib/Target/Mips/MipsInstrInfo.td + multiclass MipsHiLoRelocs { + def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)), + (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; + ... + } + defm : MipsHiLoRelocs; + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHiLoRelocs, SYM_32; + +The instantiation in Mips64InstrInfo.td is used for MIPS64 in ILP32 +mode, as guarded by the predicate "SYM_32" and also for a submode of +LP64 where symbols are assumed to be 32 bits wide. A similar +multiclass for MIPS64 in LP64 mode is also defined: + + // lib/Target/Mips/Mips64InstrInfo.td + multiclass MipsHighestHigherHiLoRelocs { + ... + def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), + (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)), + (Daddiu ZERO_64, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + } + +and it is instantiated twice: + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHighestHigherHiLoRelocs, SYM_64; + // lib/Target/Mips/MicroMips64r6InstrInfo.td + defm : MipsHighestHigherHiLoRelocs, SYM_64, + ISA_MICROMIPS64R6; + +These patterns are used during instruction selection to match +MipsISD::{Highest, Higher, Hi, Lo} to a specific machine instruction +and operands. + +More details on how multiclasses in TableGen work can be found in the +section "Multiclass definitions and instances" in the document +"TableGen Language Introduction" + +4. Instruction definitions are multiply defined to cover the different +register classes. In some cases, such as LW/LW64, this also accounts +for the difference in the results of instruction execution. On MIPS32, +"lw" loads a 32 bit value from memory. On MIPS64, "lw" loads a 32 bit +value from memory and sign extends the value to 64 bits. + + // lib/Target/Mips/MipsInstrInfo.td + def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16_relaxed>, LUI_FM; + // lib/Target/Mips/Mips64InstrInfo.td + def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM; + +defines two names "LUi" and "LUi64" with two different register +classes, but with the same encoding---"LUI_FM". These instructions load a +16-bit immediate into bits 31-16 and clear the lower 15 bits. On MIPS64, +the result is sign-extended to 64 bits. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 3026f0be242d..0f6c2e53e60a 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -38,7 +38,7 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); - if (DestRC->getSize() != SrcRC->getSize()) + if (RegInfo.getRegSizeInBits(*DestRC) != RegInfo.getRegSizeInBits(*SrcRC)) report_fatal_error("Copy one register into another with a different width"); unsigned Op; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 4c9430a2eca0..2a402deccbca 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1898,12 +1898,13 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + const TargetRegisterClass &GPRC = PPC::GPRCRegClass; + const TargetRegisterClass &G8RC = PPC::G8RCRegClass; + const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; + const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); // Might we have over-aligned allocas? bool HasAlVars = MFI.hasVarSizedObjects() && @@ -1911,9 +1912,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // These kinds of spills might need two registers. if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f7663d8e5185..4659a2ea8032 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9057,6 +9057,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -9070,7 +9071,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); @@ -9153,7 +9154,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); - const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 8e159f47ea2e..790a8902b3d2 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -440,8 +440,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opcode)); } -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void PPCInstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(PPC::NOP); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index f11aed8fa268..b30d09e03ec4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -269,7 +269,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// unsigned getInstSizeInBytes(const MachineInstr &MI) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; std::pair decomposeMachineOperandsTargetFlags(unsigned TF) const override; diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 455d1ee1564a..f120a98e9457 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -3234,6 +3234,7 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -3245,7 +3246,8 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + (void)TRI; unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index c8ff9558cc88..fee008b9572a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -104,8 +104,9 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineOperand &LowOffsetOp = MI->getOperand(2); LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); - // Clear the kill flags for the base and index registers in the first - // instruction. + // Clear the kill flags on the registers in the first instruction. + if (EarlierMI->getOperand(0).isReg() && EarlierMI->getOperand(0).isUse()) + EarlierMI->getOperand(0).setIsKill(false); EarlierMI->getOperand(1).setIsKill(false); EarlierMI->getOperand(3).setIsKill(false); @@ -1114,10 +1115,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( return nullptr; unsigned OpNum = Ops[0]; - assert(Size == - MF.getRegInfo() - .getRegClass(MI.getOperand(OpNum).getReg()) - ->getSize() && + assert(Size * 8 == + TRI->getRegSizeInBits(*MF.getRegInfo() + .getRegClass(MI.getOperand(OpNum).getReg())) && "Invalid size combination"); if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 && diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index d9c2dba5bace..4178ec0b28f0 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -45,10 +45,11 @@ using namespace llvm; //===----------------------------------------------------------------------===// MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) - if (TRC->hasType(T)) + if (TRI->isTypeLegalForClass(*TRC, T)) return T; DEBUG(errs() << "Unknown type for register number: " << RegNo); llvm_unreachable("Unknown register type"); diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 324da650e74e..c1cfc82b4a81 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3094,6 +3094,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); else if (IDVal.startswith(".att_syntax")) { + getParser().setParsingInlineAsm(false); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "prefix") Parser.Lex(); @@ -3106,6 +3107,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return false; } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); + getParser().setParsingInlineAsm(true); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "noprefix") Parser.Lex(); diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h index fdcc7e1ab7b0..19c93cfff0fe 100644 --- a/contrib/llvm/lib/Target/X86/X86.h +++ b/contrib/llvm/lib/Target/X86/X86.h @@ -95,7 +95,8 @@ void initializeFixupBWInstPassPass(PassRegistry &); /// encoding when possible in order to reduce code size. FunctionPass *createX86EvexToVexInsts(); -InstructionSelector *createX86InstructionSelector(X86Subtarget &, +InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &, X86RegisterBankInfo &); void initializeEvexToVexInstPassPass(PassRegistry &); diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 8fcc8e31d5d4..d2f650cf8f47 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -273,6 +273,16 @@ def FeatureFastSHLDRotate "fast-shld-rotate", "HasFastSHLDRotate", "true", "SHLD can be used as a faster rotate">; +// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka +// "string operations"). See "REP String Enhancement" in the Intel Software +// Development Manual. This feature essentially means that REP MOVSB will copy +// using the largest available size instead of copying bytes one by one, making +// it at least as fast as REPMOVS{W,D,Q}. +def FeatureERMSB + : SubtargetFeature< + "ermsb", "HasERMSB", "true", + "REP MOVS/STOS are fast">; + //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// @@ -498,6 +508,7 @@ def HSWFeatures : ProcessorFeatures BitOffsets; SmallVector SplitRegs; EVT PartVT = TLI.getRegisterType(Context, VT); @@ -64,8 +63,10 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)), PartTy, OrigArg.Flags}; SplitArgs.push_back(Info); - PerformArgSplit(Info.Reg, PartVT.getSizeInBits() * i); + SplitRegs.push_back(Info.Reg); } + + PerformArgSplit(SplitRegs); } namespace { @@ -112,10 +113,9 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); SmallVector SplitArgs; - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, VReg, Offset); - }); + splitToValueTypes( + OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs) { MIRBuilder.buildUnmerge(Regs, VReg); }); FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) @@ -183,22 +183,10 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, for (auto &Arg : F.args()) { ArgInfo OrigArg(VRegs[Idx], Arg.getType()); setArgFlags(OrigArg, Idx + 1, DL, F); - LLT Ty = MRI.getType(VRegs[Idx]); - unsigned Dst = VRegs[Idx]; - bool Split = false; splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](unsigned Reg, uint64_t Offset) { - if (!Split) { - Split = true; - Dst = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildUndef(Dst); - } - unsigned Tmp = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset); - Dst = Tmp; + [&](ArrayRef Regs) { + MIRBuilder.buildMerge(VRegs[Idx], Regs); }); - if (Dst != VRegs[Idx]) - MIRBuilder.buildCopy(VRegs[Idx], Dst); Idx++; } diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm/lib/Target/X86/X86CallLowering.h index 204e6974c702..8a8afb568298 100644 --- a/contrib/llvm/lib/Target/X86/X86CallLowering.h +++ b/contrib/llvm/lib/Target/X86/X86CallLowering.h @@ -34,14 +34,15 @@ class X86CallLowering : public CallLowering { bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; + private: /// A function of this type is used to perform value split action. - typedef std::function SplitArgTy; + typedef std::function)> SplitArgTy; void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy SplitArg) const; }; -} // End of namespace llvm; +} // namespace llvm #endif diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index 036f5d2610e4..b8477810b4c9 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -2149,7 +2149,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { if (!LHSReg || !RHSReg) return false; - unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); + const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); + unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill); updateValueMap(I, ResultReg); diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index 8678a13b95d0..a94045cd536d 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1783,6 +1783,14 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset + FPDelta; } +int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &FrameReg, + int Adjustment) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + FrameReg = TRI->getStackRegister(); + return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment; +} + int X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, @@ -1839,9 +1847,6 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, assert(MF.getInfo()->getTCReturnAddrDelta() >= 0 && "we don't handle this case!"); - // Fill in FrameReg output argument. - FrameReg = TRI->getStackRegister(); - // This is how the math works out: // // %rsp grows (i.e. gets lower) left to right. Each box below is @@ -1866,12 +1871,8 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, // (C - E) == (C - A) - (B - A) + (B - E) // { Using [1], [2] and [3] above } // == getObjectOffset - LocalAreaOffset + StackSize - // - // Get the Offset from the StackPointer - int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); - - return Offset + StackSize; + return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize); } bool X86FrameLowering::assignCalleeSavedSpillSlots( @@ -1923,14 +1924,15 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( continue; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment - SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); + SpillSlotOffset -= std::abs(SpillSlotOffset) % Align; // spill into slot - SpillSlotOffset -= RC->getSize(); - int SlotIndex = - MFI.CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + SpillSlotOffset -= Size; + int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); CSI[i - 1].setFrameIdx(SlotIndex); - MFI.ensureMaxAlignment(RC->getAlignment()); + MFI.ensureMaxAlignment(Align); } return true; diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h index 863dc8b22968..7d214cabad53 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h @@ -100,6 +100,8 @@ class X86FrameLowering : public TargetFrameLowering { int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + int getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &SPReg, int Adjustment) const; int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const override; diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index b5f29fb400ef..ada46643a5fe 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5060,8 +5060,8 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); + return DAG.getBuildVector( + ResultVT, dl, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -14424,8 +14424,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, // If the input is a buildvector just emit a smaller one. unsigned ElemsPerChunk = ResVT.getVectorNumElements(); if (In.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(ISD::BUILD_VECTOR, dl, ResVT, - makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk)); + return DAG.getBuildVector( + ResVT, dl, makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk)); // Everything else is legal. return Op; @@ -25944,6 +25944,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); @@ -25960,7 +25961,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, DstReg = MI.getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + (void)TRI; unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); @@ -30207,7 +30209,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); - if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO)) { // If we changed the computation somewhere in the DAG, this change will @@ -33777,7 +33779,7 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); } @@ -35937,7 +35939,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to // turn into {ax},{dx}. // MVT::Other is used to specify clobber names. - if (Res.second->hasType(VT) || VT == MVT::Other) + if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) return Res; // Correct type already, nothing to do. // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should @@ -35975,11 +35977,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; - else if (X86::VR128RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT)) Res.second = &X86::VR128RegClass; - else if (X86::VR256RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT)) Res.second = &X86::VR256RegClass; - else if (X86::VR512RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) Res.second = &X86::VR512RegClass; else { // Type mismatch and not a clobber: Return an error; diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td index bfd21c062aa2..66382014f6e8 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -989,10 +989,12 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // Constraints = "$src1 = $dst" - def NAME#8mr : BinOpMR_RMW; - def NAME#16mr : BinOpMR_RMW; - def NAME#32mr : BinOpMR_RMW; - def NAME#64mr : BinOpMR_RMW; + let mayLoad = 1, mayStore = 1 in { + def NAME#8mr : BinOpMR_RMW; + def NAME#16mr : BinOpMR_RMW; + def NAME#32mr : BinOpMR_RMW; + def NAME#64mr : BinOpMR_RMW; + } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 7b456fd68343..26444dd1f619 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6284,9 +6284,11 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); assert(Cond.size() == 1 && "Invalid Cond array"); unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), - MRI.getRegClass(DstReg)->getSize(), + TRI.getRegSizeInBits(RC) / 8, false /*HasMemoryOperand*/); BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); } @@ -6557,7 +6559,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, bool HasAVX512 = STI.hasAVX512(); bool HasVLX = STI.hasVLX(); - switch (RC->getSize()) { + switch (STI.getRegisterInfo()->getSpillSize(*RC)) { default: llvm_unreachable("Unknown spill size"); case 1: @@ -6603,28 +6605,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; case 16: { - assert(X86::VR128XRegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); - // If stack is realigned we can use aligned stores. - if (isStackAligned) - return load ? - (HasVLX ? X86::VMOVAPSZ128rm : - HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : - HasAVX ? X86::VMOVAPSrm : - X86::MOVAPSrm): - (HasVLX ? X86::VMOVAPSZ128mr : - HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : - HasAVX ? X86::VMOVAPSmr : - X86::MOVAPSmr); - else - return load ? - (HasVLX ? X86::VMOVUPSZ128rm : - HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : - HasAVX ? X86::VMOVUPSrm : - X86::MOVUPSrm): - (HasVLX ? X86::VMOVUPSZ128mr : - HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : - HasAVX ? X86::VMOVUPSmr : - X86::MOVUPSmr); + if (X86::VR128XRegClass.hasSubClassEq(RC)) { + // If stack is realigned we can use aligned stores. + if (isStackAligned) + return load ? + (HasVLX ? X86::VMOVAPSZ128rm : + HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : + HasAVX ? X86::VMOVAPSrm : + X86::MOVAPSrm): + (HasVLX ? X86::VMOVAPSZ128mr : + HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : + HasAVX ? X86::VMOVAPSmr : + X86::MOVAPSmr); + else + return load ? + (HasVLX ? X86::VMOVUPSZ128rm : + HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : + HasAVX ? X86::VMOVUPSrm : + X86::MOVUPSrm): + (HasVLX ? X86::VMOVUPSZ128mr : + HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : + HasAVX ? X86::VMOVUPSmr : + X86::MOVUPSmr); + } + if (X86::BNDRRegClass.hasSubClassEq(RC)) { + if (STI.is64Bit()) + return load ? X86::BNDMOVRM64rm : X86::BNDMOVMR64mr; + else + return load ? X86::BNDMOVRM32rm : X86::BNDMOVMR32mr; + } + llvm_unreachable("Unknown 16-byte regclass"); } case 32: assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); @@ -6709,9 +6719,9 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= RC->getSize() && + assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Stack slot too small for store"); - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); @@ -6728,7 +6738,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - unsigned Alignment = std::max(RC->getSize(), 16); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); @@ -6748,7 +6759,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); @@ -6763,7 +6774,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - unsigned Alignment = std::max(RC->getSize(), 16); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); @@ -7222,7 +7234,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, NewOpc = getSETFromCond(NewCC, HasMemoryOperand); else { unsigned DstReg = Instr.getOperand(0).getReg(); - NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(), + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + NewOpc = getCMovFromCond(NewCC, TRI->getRegSizeInBits(*DstRC)/8, HasMemoryOperand); } @@ -7750,7 +7763,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( unsigned DstIdx = (Imm >> 4) & 3; unsigned SrcIdx = (Imm >> 6) & 3; - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size <= RCSize && 4 <= Align) { int PtrOffset = SrcIdx * 4; unsigned NewImm = (DstIdx << 4) | ZMask; @@ -7772,7 +7787,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( // To fold the load, adjust the pointer to the upper and use (V)MOVLPS. // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. if (OpNum == 2) { - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size <= RCSize && 8 <= Align) { unsigned NewOpCode = (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm : @@ -7861,7 +7878,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( return nullptr; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, + &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -8302,11 +8322,13 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineFunction &MF) { unsigned Opc = LoadMI.getOpcode(); unsigned UserOpc = UserMI.getOpcode(); - unsigned RegSize = - MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg()); + unsigned RegSize = TRI.getRegSizeInBits(*RC); if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) && - RegSize > 4) { + RegSize > 32) { // These instructions only load 32 bits, we can't fold them if the // destination register is wider than 32 bits (4 bytes), and its user // instruction isn't scalar (SS). @@ -8357,7 +8379,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, } if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) && - RegSize > 8) { + RegSize > 64) { // These instructions only load 64 bits, we can't fold them if the // destination register is wider than 64 bits (8 bytes), and its user // instruction isn't scalar (SD). @@ -8702,6 +8724,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedStore = I->second.second & TB_FOLDED_STORE; const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); unsigned NumDefs = MCID.NumDefs; std::vector AddrOps; @@ -8724,7 +8747,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = nullptr; if (FoldedLoad) { - EVT VT = *RC->vt_begin(); + EVT VT = *TRI.legalclasstypes_begin(*RC); std::pair MMOs = MF.extractLoadMemRefs(cast(N)->memoperands_begin(), @@ -8736,7 +8759,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, @@ -8752,7 +8775,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, const TargetRegisterClass *DstRC = nullptr; if (MCID.getNumDefs() > 0) { DstRC = getRegClass(MCID, 0, &RI, MF); - VTs.push_back(*DstRC->vt_begin()); + VTs.push_back(*TRI.legalclasstypes_begin(*DstRC)); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); @@ -8781,7 +8804,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; SDNode *Store = @@ -9514,7 +9537,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { } /// Return the noop instruction to use for a noop. -void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +void X86InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h index 2fee48570ce1..38567831b3a4 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h @@ -457,7 +457,7 @@ class X86InstrInfo final : public X86GenInstrInfo { int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index e31d2769047b..c3def461afdc 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -897,6 +897,7 @@ def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; +def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index e1bf28cbf612..f22a50200c9a 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -4602,17 +4602,17 @@ def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], IIC_SSE_MOVDQ>, Sched<[WriteMove]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; let isCodeGenOnly = 1 in def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], IIC_SSE_MOVDQ>, Sched<[WriteMove]>; } // ExeDomain = SSEPackedInt @@ -4681,7 +4681,7 @@ def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), VEX; def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; @@ -4694,7 +4694,7 @@ def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; } // ExeDomain = SSEPackedInt @@ -4721,7 +4721,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), @@ -4811,12 +4811,12 @@ let Predicates = [UseSSE2] in { } } -// These are the correct encodings of the instructions so that we know how to -// read correct assembly, even though we continue to emit the wrong ones for -// compatibility with Darwin's buggy assembler. -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of +// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add +// these aliases. +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; // Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", @@ -7144,33 +7144,37 @@ let Predicates = [UseSSE41] in { /// SS42I_binop_rm - Simple SSE 4.2 binary operator multiclass SS42I_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, OpndItins itins, + bit Is2Addr = 1> { def rr : SS428I; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>; def rm : SS428I; + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, - loadv2i64, i128mem, 0>, VEX_4V, VEX_WIG; + loadv2i64, i128mem, SSE_INTALU_ITINS_P, 0>, + VEX_4V, VEX_WIG; let Predicates = [HasAVX2] in defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, - loadv4i64, i256mem, 0>, VEX_4V, VEX_L, VEX_WIG; + loadv4i64, i256mem, SSE_INTALU_ITINS_P, 0>, + VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, SSE_INTALU_ITINS_P>; //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp index fb9315792892..d0f1b7091da9 100644 --- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -39,11 +39,16 @@ using namespace llvm; namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class X86InstructionSelector : public InstructionSelector { public: - X86InstructionSelector(const X86Subtarget &STI, + X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); + void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -70,10 +75,17 @@ class X86InstructionSelector : public InstructionSelector { bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + const X86TargetMachine &TM; const X86Subtarget &STI; const X86InstrInfo &TII; const X86RegisterInfo &TRI; const X86RegisterBankInfo &RBI; + bool OptForSize; + bool OptForMinSize; + + PredicateBitset AvailableFeatures; + PredicateBitset computeAvailableFeatures(const MachineFunction *MF, + const X86Subtarget *Subtarget) const; #define GET_GLOBALISEL_TEMPORARIES_DECL #include "X86GenGlobalISel.inc" @@ -86,10 +98,12 @@ class X86InstructionSelector : public InstructionSelector { #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_IMPL -X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, +X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, + const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), OptForSize(false), + OptForMinSize(false), AvailableFeatures() #define GET_GLOBALISEL_TEMPORARIES_INIT #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -181,6 +195,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } +void X86InstructionSelector::beginFunction(const MachineFunction &MF) { + OptForSize = MF.getFunction()->optForSize(); + OptForMinSize = MF.getFunction()->optForMinSize(); + AvailableFeatures = computeAvailableFeatures(&MF, &STI); +} + bool X86InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -571,7 +591,8 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I, } InstructionSelector * -llvm::createX86InstructionSelector(X86Subtarget &Subtarget, +llvm::createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &Subtarget, X86RegisterBankInfo &RBI) { - return new X86InstructionSelector(Subtarget, RBI); + return new X86InstructionSelector(TM, Subtarget, RBI); } diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp index 9bab9a4cf3ba..1f16f3c9a14d 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -137,25 +137,29 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case X86::FR32RegClassID: case X86::FR64RegClassID: // If AVX-512 isn't supported we should only inflate to these classes. - if (!Subtarget.hasAVX512() && Super->getSize() == RC->getSize()) + if (!Subtarget.hasAVX512() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128RegClassID: case X86::VR256RegClassID: // If VLX isn't supported we should only inflate to these classes. - if (!Subtarget.hasVLX() && Super->getSize() == RC->getSize()) + if (!Subtarget.hasVLX() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128XRegClassID: case X86::VR256XRegClassID: // If VLX isn't support we shouldn't inflate to these classes. - if (Subtarget.hasVLX() && Super->getSize() == RC->getSize()) + if (Subtarget.hasVLX() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::FR32XRegClassID: case X86::FR64XRegClassID: // If AVX-512 isn't support we shouldn't inflate to these classes. - if (Subtarget.hasAVX512() && Super->getSize() == RC->getSize()) + if (Subtarget.hasAVX512() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::GR8RegClassID: @@ -168,7 +172,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case X86::VR512RegClassID: // Don't return a super-class that would shrink the spill size. // That can happen with the vector and float classes. - if (Super->getSize() == RC->getSize()) + if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; } Super = *I++; @@ -669,32 +673,28 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + + // Determine base register and offset. + int FIOffset; unsigned BasePtr; - - unsigned Opc = MI.getOpcode(); - bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm || - Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64; - - if (hasBasePointer(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister()); - else if (needsStackRealignment(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); - else if (AfterFPPop) - BasePtr = StackPtr; - else - BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); + if (MI.isReturn()) { + assert((!needsStackRealignment(MF) || + MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && + "Return instruction can only reference SP relative frame objects"); + FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); + } else { + FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); + } // LOCAL_ESCAPE uses a single offset, with no register. It only works in the // simple FP case, and doesn't work with stack realignment. On 32-bit, the // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. - unsigned IgnoredFrameReg; + unsigned Opc = MI.getOpcode(); if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); - int Offset; - Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - FI.ChangeToImmediate(Offset); + FI.ChangeToImmediate(FIOffset); return; } @@ -710,15 +710,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // FrameIndex with base register. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); - // Now add the frame object offset to the offset from EBP. - int FIOffset; - if (AfterFPPop) { - // Tail call jmp happens after FP is popped. - const MachineFrameInfo &MFI = MF.getFrameInfo(); - FIOffset = MFI.getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea(); - } else - FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - if (BasePtr == StackPtr) FIOffset += SPAdj; diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 9da8a18965ea..1a72a0ba3a64 100644 --- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -106,7 +106,6 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue Count; ConstantSDNode *ValC = dyn_cast(Src); unsigned BytesLeft = 0; - bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; @@ -163,20 +162,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, - CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - } else if (BytesLeft) { + if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); @@ -195,6 +181,24 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( return Chain; } +namespace { + +// Represents a cover of a buffer of SizeVal bytes with blocks of size +// AVT, as well as how many bytes remain (BytesLeft is always smaller than +// the block size). +struct RepMovsRepeats { + RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) { + const unsigned UBytes = AVT.getSizeInBits() / 8; + Count = SizeVal / UBytes; + BytesLeft = SizeVal % UBytes; + } + + unsigned Count; + unsigned BytesLeft; +}; + +} // namespace + SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, @@ -229,7 +233,12 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( return SDValue(); MVT AVT; - if (Align & 1) + if (Subtarget.hasERMSB()) + // If the target has enhanced REPMOVSB, then it's at least as fast to use + // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle + // BytesLeft. + AVT = MVT::i8; + else if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; @@ -240,14 +249,18 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( // QWORD aligned AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal, dl); - unsigned BytesLeft = SizeVal % UBytes; + RepMovsRepeats Repeats(SizeVal, AVT); + if (Repeats.BytesLeft > 0 && + DAG.getMachineFunction().getFunction()->optForMinSize()) { + // When agressively optimizing for size, avoid generating the code to handle + // BytesLeft. + AVT = MVT::i8; + Repeats = RepMovsRepeats(SizeVal, AVT); + } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, - Count, InFlag); + DAG.getIntPtrConstant(Repeats.Count, dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); @@ -262,9 +275,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SmallVector Results; Results.push_back(RepMovs); - if (BytesLeft) { + if (Repeats.BytesLeft) { // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; + unsigned Offset = SizeVal - Repeats.BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); @@ -275,7 +288,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), + DAG.getConstant(Repeats.BytesLeft, dl, + SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index 92a68759195c..4154530d04e7 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -303,6 +303,7 @@ void X86Subtarget::initializeEnvironment() { HasFastVectorFSQRT = false; HasFastLZCNT = false; HasFastSHLDRotate = false; + HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index d0d88d326949..fd057f36c890 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -232,6 +232,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// True if SHLD based rotate is fast. bool HasFastSHLDRotate; + /// True if the processor has enhanced REP MOVSB/STOSB. + bool HasERMSB; + /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; @@ -472,6 +475,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp index 03a1958121ab..623cf38aa951 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -286,7 +286,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI)); + GISel->InstSelector.reset(createX86InstructionSelector(*this, *I, *RBI)); #endif I->setGISelAccessor(*GISel); } diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index a752357400b3..784612038c09 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -575,18 +575,17 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { assert(RS && "requiresRegisterScavenging failed"); MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); XCoreFunctionInfo *XFI = MF.getInfo(); // Reserve slots close to SP or frame pointer for Scavenging spills. // When using SP for small frames, we don't need any scratch registers. // When using SP for large frames, we may need 2 scratch registers. // When using FP, for large or small frames, we may need 1 scratch register. + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); if (XFI->isLargeFrame(MF) || hasFP(MF)) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); if (XFI->isLargeFrame(MF) && !hasFP(MF)) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 45437815fa37..2efcd46cd8d4 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -1605,7 +1605,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) || + if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) || TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); @@ -1622,7 +1622,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Time, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index e91536ca1e83..75af0e97dfb5 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -10,6 +10,7 @@ #include "XCoreMachineFunctionInfo.h" #include "XCoreInstrInfo.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -35,13 +36,15 @@ int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) { if (LRSpillSlotSet) { return LRSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); if (! MF.getFunction()->isVarArg()) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - LRSpillSlot = MFI.CreateFixedObject(RC->getSize(), 0, true); + LRSpillSlot = MFI.CreateFixedObject(TRI.getSpillSize(RC), 0, true); } else { - LRSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + LRSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), true); } LRSpillSlotSet = true; return LRSpillSlot; @@ -51,9 +54,11 @@ int XCoreFunctionInfo::createFPSpillSlot(MachineFunction &MF) { if (FPSpillSlotSet) { return FPSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - FPSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + FPSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), true); FPSpillSlotSet = true; return FPSpillSlot; } @@ -62,10 +67,13 @@ const int* XCoreFunctionInfo::createEHSpillSlot(MachineFunction &MF) { if (EHSpillSlotSet) { return EHSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - EHSpillSlot[0] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); - EHSpillSlot[1] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); + EHSpillSlot[0] = MFI.CreateStackObject(Size, Align, true); + EHSpillSlot[1] = MFI.CreateStackObject(Size, Align, true); EHSpillSlotSet = true; return EHSpillSlot; } diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index a2f6e5639d9d..78e71c18fe29 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -17,7 +17,9 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -27,10 +29,21 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; -#define DEBUG_TYPE "partialinlining" +#define DEBUG_TYPE "partial-inlining" STATISTIC(NumPartialInlined, "Number of functions partially inlined"); +// Command line option to disable partial-inlining. The default is false: +static cl::opt + DisablePartialInlining("disable-partial-inlining", cl::init(false), + cl::Hidden, cl::desc("Disable partial ininling")); + +// Command line option to set the maximum number of partial inlining allowed +// for the module. The default value of -1 means no limit. +static cl::opt MaxNumPartialInlining( + "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of partial inlining. The default is unlimited")); + namespace { struct PartialInlinerImpl { PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {} @@ -39,6 +52,12 @@ struct PartialInlinerImpl { private: InlineFunctionInfo IFI; + int NumPartialInlining = 0; + + bool IsLimitReached() { + return (MaxNumPartialInlining != -1 && + NumPartialInlining >= MaxNumPartialInlining); + } }; struct PartialInlinerLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid @@ -66,6 +85,9 @@ struct PartialInlinerLegacyPass : public ModulePass { Function *PartialInlinerImpl::unswitchFunction(Function *F) { // First, verify that this function is an unswitching candidate... + if (F->hasAddressTaken()) + return nullptr; + BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) @@ -149,11 +171,29 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { // Inline the top-level if test into all callers. std::vector Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); - for (User *User : Users) + + for (User *User : Users) { + CallSite CS; if (CallInst *CI = dyn_cast(User)) - InlineFunction(CI, IFI); + CS = CallSite(CI); else if (InvokeInst *II = dyn_cast(User)) - InlineFunction(II, IFI); + CS = CallSite(II); + else + llvm_unreachable("All uses must be calls"); + + if (IsLimitReached()) + continue; + NumPartialInlining++; + + OptimizationRemarkEmitter ORE(CS.getCaller()); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block) + << ore::NV("Callee", F) << " partially inlined into " + << ore::NV("Caller", CS.getCaller())); + + InlineFunction(CS, IFI); + } // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. @@ -166,6 +206,9 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { } bool PartialInlinerImpl::run(Module &M) { + if (DisablePartialInlining) + return false; + std::vector Worklist; Worklist.reserve(M.size()); for (Function &F : M) diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index f11b58d1adc4..0d5910ebbfcc 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -282,6 +282,12 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { } if (!PGOInstrUse.empty()) MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + // Indirect call promotion that promotes intra-module targets only. + // For ThinLTO this is done earlier due to interactions with globalopt + // for imported functions. We don't run this at -O0. + if (OptLevel > 0) + MPM.add( + createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { @@ -414,11 +420,14 @@ void PassManagerBuilder::populateModulePassManager( else if (!GlobalExtensions->empty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); - if (PrepareForThinLTO) - // Rename anon globals to be able to export them in the summary. - MPM.add(createNameAnonGlobalPass()); - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + + // Rename anon globals to be able to export them in the summary. + // This has to be done after we add the extensions to the pass manager + // as there could be passes (e.g. Adddress sanitizer) which introduce + // new unnamed globals. + if (PrepareForThinLTO) + MPM.add(createNameAnonGlobalPass()); return; } @@ -468,16 +477,10 @@ void PassManagerBuilder::populateModulePassManager( // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd // profile annotation in backend more difficult. - if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) { - /// PGO instrumentation is added during the compile phase for ThinLTO, do - /// not run it a second time + // PGO instrumentation is added during the compile phase for ThinLTO, do + // not run it a second time + if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); - // Indirect call promotion that promotes intra-module targets only. - // For ThinLTO this is done earlier due to interactions with globalopt - // for imported functions. - MPM.add( - createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); - } if (EnableNonLTOGlobalsModRef) // We add a module alias analysis pass here. In part due to bugs in the @@ -677,6 +680,11 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopSinkPass()); // Get rid of LCSSA nodes. MPM.add(createInstructionSimplifierPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + MPM.add(createCFGSimplificationPass()); + addExtensionsToPM(EP_OptimizerLast, MPM); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index e30a4bafb9b0..030461004f56 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -794,6 +795,11 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { if (Opnd->isConstant()) continue; + // The constant check above is really for a few special constant + // coefficients. + if (isa(Opnd->getSymVal())) + continue; + const FAddendCoef &CE = Opnd->getCoef(); if (CE.isMinusOne() || CE.isMinusTwo()) NegOpndNum++; @@ -894,24 +900,22 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); + KnownBits LHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, 0, &CxtI); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); + KnownBits RHSKnown(BitWidth); + computeKnownBits(RHS, RHSKnown, 0, &CxtI); // Addition of two 2's complement numbers having opposite signs will never // overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) + if ((LHSKnown.One[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]) || + (LHSKnown.Zero[BitWidth - 1] && RHSKnown.One[BitWidth - 1])) return true; // Check if carry bit of addition will not cause overflow. - if (checkRippleForAdd(LHSKnownZero, RHSKnownZero)) + if (checkRippleForAdd(LHSKnown.Zero, RHSKnown.Zero)) return true; - if (checkRippleForAdd(RHSKnownZero, LHSKnownZero)) + if (checkRippleForAdd(RHSKnown.Zero, LHSKnown.Zero)) return true; return false; @@ -931,18 +935,16 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); + KnownBits LHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, 0, &CxtI); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); + KnownBits RHSKnown(BitWidth); + computeKnownBits(RHS, RHSKnown, 0, &CxtI); // Subtraction of two 2's complement numbers having identical signs will // never overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1])) + if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) || + (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1])) return true; // TODO: implement logic similar to checkRippleForAdd @@ -1113,10 +1115,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // a sub and fuse this add with it. if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { IntegerType *IT = cast(I.getType()); - APInt LHSKnownOne(IT->getBitWidth(), 0); - APInt LHSKnownZero(IT->getBitWidth(), 0); - computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I); - if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) + KnownBits LHSKnown(IT->getBitWidth()); + computeKnownBits(XorLHS, LHSKnown, 0, &I); + if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } @@ -1385,39 +1386,58 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast(LHS)) { Value *LHSIntVal = LHSConv->getOperand(0); + Type *FPType = LHSConv->getType(); + + // TODO: This check is overly conservative. In many cases known bits + // analysis can tell us that the result of the addition has less significant + // bits than the integer type can hold. + auto IsValidPromotion = [](Type *FTy, Type *ITy) { + Type *FScalarTy = FTy->getScalarType(); + Type *IScalarTy = ITy->getScalarType(); + + // Do we have enough bits in the significand to represent the result of + // the integer addition? + unsigned MaxRepresentableBits = + APFloat::semanticsPrecision(FScalarTy->getFltSemantics()); + return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits; + }; // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) // ... if the constant fits in the integer value. This is useful for things // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer // requires a constant pool load, and generally allows the add to be better // instcombined. - if (ConstantFP *CFP = dyn_cast(RHS)) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - CI, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + if (ConstantFP *CFP = dyn_cast(RHS)) + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } - } // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast(RHS)) { Value *RHSIntVal = RHSConv->getOperand(0); - - // Only do this if x/y have the same type, if at least one of them has a - // single use (so we don't increase the number of int->fp conversions), - // and if the integer add will not overflow. - if (LHSIntVal->getType() == RHSIntVal->getType() && - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - RHSIntVal, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + // It's enough to check LHS types only because we require int types to + // be the same for this transform. + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + // Only do this if x/y have the same type, if at least one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSIntVal->getType() == RHSIntVal->getType() && + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + RHSIntVal, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } } } @@ -1617,10 +1637,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. if (Op0C->isMask()) { - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(Op1, RHSKnownZero, RHSKnownOne, 0, &I); - if ((*Op0C | RHSKnownZero).isAllOnesValue()) + KnownBits RHSKnown(BitWidth); + computeKnownBits(Op1, RHSKnown, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateXor(Op1, Op0); } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3a98e8937bda..a97b5a9ec0bb 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -906,15 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (PredL) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 - case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 - case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return LHS; - } case ICmpInst::ICMP_NE: switch (PredR) { default: @@ -930,43 +921,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 return Builder->CreateICmpSLT(LHS0, LHSC); break; // (X != 13 & X s< 15) -> no change - case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 - case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return RHS; case ICmpInst::ICMP_NE: // Potential folds for this case should already be handled. break; } break; - case ICmpInst::ICMP_ULT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); - case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 - case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return LHS; - } - break; - case ICmpInst::ICMP_SLT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 - case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return LHS; - } - break; case ICmpInst::ICMP_UGT: switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return RHS; case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 return Builder->CreateICmp(PredL, LHS0, RHSC); @@ -980,9 +943,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return RHS; case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 return Builder->CreateICmp(PredL, LHS0, RHSC); @@ -1234,6 +1194,56 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) { return nullptr; } +static Instruction *foldAndToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::And); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'or' is Op0. + // (A | B) & ~(A & B) --> A ^ B + // (A | B) & ~(B & A) --> A ^ B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateXor(A, B); + + // (A | ~B) & (~A | B) --> ~(A ^ B) + // (A | ~B) & (B | ~A) --> ~(A ^ B) + // (~B | A) & (~A | B) --> ~(A ^ B) + // (~B | A) & (B | ~A) --> ~(A ^ B) + if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B)))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + return nullptr; +} + +static Instruction *foldOrToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::Or); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'and' is Op0. + // (A & B) | ~(A | B) --> ~(A ^ B) + // (A & B) | ~(B | A) --> ~(A ^ B) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + // (A & ~B) | (~A & B) --> A ^ B + // (A & ~B) | (B & ~A) --> A ^ B + // (~B & A) | (~A & B) --> A ^ B + // (~B & A) | (B & ~A) --> A ^ B + if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) + return BinaryOperator::CreateXor(A, B); + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -1247,15 +1257,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); - // (A|B)&(A|C) -> A|(B&C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) - return replaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldAndToXor(I, *Builder)) + return Xor; + + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); @@ -1366,19 +1380,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return DeMorgan; { - Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - // (A|B) & ~(A&B) -> A^B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - // ~(A&B) & (A|B) -> A^B - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - + Value *A = nullptr, *B = nullptr, *C = nullptr; // A&(A^B) => A & ~B { Value *tmpOp0 = Op0; @@ -1405,11 +1407,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } // (A&((~A)|B)) -> A&B - if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || - match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A)))) return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || - match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A)))) return BinaryOperator::CreateAnd(A, Op0); // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C @@ -1425,13 +1425,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B)))) + // (A | B) & (B ^ (~A)) -> (A & B) + // (B | A) & ((~A) ^ B) -> (A & B) + // (B | A) & (B ^ (~A)) -> (A & B) + if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && + match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) return BinaryOperator::CreateAnd(A, B); // ((~A) ^ B) & (A | B) -> (A & B) // ((~A) ^ B) & (B | A) -> (A & B) - if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) && + // (B ^ (~A)) & (A | B) -> (A & B) + // (B ^ (~A)) & (B | A) -> (A & B) + if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) return BinaryOperator::CreateAnd(A, B); } @@ -2037,15 +2042,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); - // (A&B)|(A&C) -> A&(B|C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) - return replaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldOrToXor(I, *Builder)) + return Xor; + + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); @@ -2105,19 +2114,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { match(Op0, m_c_And(m_Specific(A), m_Value(B)))) return BinaryOperator::CreateOr(Op1, B); - // (A & ~B) | (A ^ B) -> (A ^ B) - // (~B & A) | (A ^ B) -> (A ^ B) - if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Xor(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // Commute the 'or' operands. - // (A ^ B) | (A & ~B) -> (A ^ B) - // (A ^ B) | (~B & A) -> (A ^ B) - if (match(Op1, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op0, m_Xor(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - // (A & C)|(B & D) Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && @@ -2182,23 +2178,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return replaceInstUsesWith(I, V); } - // ((A&~B)|(~A&B)) -> A^B - if ((match(C, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, D); - // ((~B&A)|(~A&B)) -> A^B - if ((match(A, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, D); - // ((A&~B)|(B&~A)) -> A^B - if ((match(C, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, B); - // ((~B&A)|(B&~A)) -> A^B - if ((match(A, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, B); - // ((A|B)&1)|(B&-2) -> (A&1) | B if (match(A, m_Or(m_Value(V1), m_Specific(B))) || match(A, m_Or(m_Specific(B), m_Value(V1)))) { @@ -2374,6 +2353,58 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return Changed ? &I : nullptr; } +/// A ^ B can be specified using other logic ops in a variety of patterns. We +/// can fold these early and efficiently by morphing an existing instruction. +static Instruction *foldXorToXor(BinaryOperator &I) { + assert(I.getOpcode() == Instruction::Xor); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // There are 4 commuted variants for each of the basic patterns. + + // (A & B) ^ (A | B) -> A ^ B + // (A & B) ^ (B | A) -> A ^ B + // (A | B) ^ (A & B) -> A ^ B + // (A | B) ^ (B & A) -> A ^ B + if ((match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) || + (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_c_And(m_Specific(A), m_Specific(B))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + // (A | ~B) ^ (~A | B) -> A ^ B + // (~B | A) ^ (~A | B) -> A ^ B + // (~A | B) ^ (A | ~B) -> A ^ B + // (B | ~A) ^ (A | ~B) -> A ^ B + if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + // (A & ~B) ^ (~A & B) -> A ^ B + // (~B & A) ^ (~A & B) -> A ^ B + // (~A & B) ^ (A & ~B) -> A ^ B + // (B & ~A) ^ (A & ~B) -> A ^ B + if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2387,6 +2418,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); + if (Instruction *NewXor = foldXorToXor(I)) + return NewXor; + // (A&B)^(A&C) -> A&(B^C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); @@ -2399,44 +2433,39 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); - // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I)) { - if (BinaryOperator *Op0I = dyn_cast(NotOp)) { - if (Op0I->getOpcode() == Instruction::And || - Op0I->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(Op0I->getOperand(1))) - Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } - - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (IsFreeToInvert(Op0I->getOperand(0), - Op0I->getOperand(0)->hasOneUse()) && - IsFreeToInvert(Op0I->getOperand(1), - Op0I->getOperand(1)->hasOneUse())) { - Value *NotX = - Builder->CreateNot(Op0I->getOperand(0), "notlhs"); - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), "notrhs"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(NotX, NotY); - return BinaryOperator::CreateAnd(NotX, NotY); - } - - } else if (Op0I->getOpcode() == Instruction::AShr) { - // ~(~X >>s Y) --> (X >>s Y) - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) - return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1)); + // Is this a 'not' (~) fed by a binary operator? + BinaryOperator *NotOp; + if (match(&I, m_Not(m_BinOp(NotOp)))) { + if (NotOp->getOpcode() == Instruction::And || + NotOp->getOpcode() == Instruction::Or) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (dyn_castNotVal(NotOp->getOperand(1))) + NotOp->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) { + Value *NotY = Builder->CreateNot( + NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not"); + if (NotOp->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); } + + // ~(X & Y) --> (~X | ~Y) - De Morgan's Law + // ~(X | Y) === (~X & ~Y) - De Morgan's Law + if (IsFreeToInvert(NotOp->getOperand(0), + NotOp->getOperand(0)->hasOneUse()) && + IsFreeToInvert(NotOp->getOperand(1), + NotOp->getOperand(1)->hasOneUse())) { + Value *NotX = Builder->CreateNot(NotOp->getOperand(0), "notlhs"); + Value *NotY = Builder->CreateNot(NotOp->getOperand(1), "notrhs"); + if (NotOp->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(NotX, NotY); + return BinaryOperator::CreateAnd(NotX, NotY); + } + } else if (NotOp->getOpcode() == Instruction::AShr) { + // ~(~X >>s Y) --> (X >>s Y) + if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) + return BinaryOperator::CreateAShr(Op0NotVal, NotOp->getOperand(1)); } } @@ -2574,40 +2603,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { { Value *A, *B, *C, *D; - // (A & B)^(A | B) -> A ^ B - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_Or(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | B)^(A & B) -> A ^ B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | ~B) ^ (~A | B) -> A ^ B - // (~B | A) ^ (~A | B) -> A ^ B - if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // (~A | B) ^ (A | ~B) -> A ^ B - if (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B))))) { - return BinaryOperator::CreateXor(A, B); - } - // (A & ~B) ^ (~A & B) -> A ^ B - // (~B & A) ^ (~A & B) -> A ^ B - if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // (~A & B) ^ (A & ~B) -> A ^ B - if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B))))) { - return BinaryOperator::CreateXor(A, B); - } // (A ^ C)^(A | B) -> ((~A) & B) ^ C if (match(Op0, m_Xor(m_Value(D), m_Value(C))) && match(Op1, m_Or(m_Value(A), m_Value(B)))) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e7aa1a457371..313ab13b9e2b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" @@ -1378,14 +1379,13 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { return nullptr; unsigned BitWidth = IT->getBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II); + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); // Create a mask for bits above (ctlz) or below (cttz) the first known one. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; - unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros() - : KnownOne.countLeadingZeros(); + unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros() + : Known.One.countLeadingZeros(); APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits) : APInt::getHighBitsSet(BitWidth, NumMaskBits); @@ -1393,7 +1393,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // zero, this value is constant. // FIXME: This should be in InstSimplify because we're replacing an // instruction with a constant. - if ((Mask & KnownZero) == Mask) { + if (Mask.isSubsetOf(Known.Zero)) { auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits)); return IC.replaceInstUsesWith(II, C); } @@ -1401,7 +1401,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // If the input to cttz/ctlz is known to be non-zero, // then change the 'ZeroIsUndef' parameter to 'true' // because we know the zero behavior can't affect the result. - if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) { + if (Known.One != 0 || isKnownNonZero(Op0, IC.getDataLayout())) { if (!match(II.getArgOperand(1), m_One())) { II.setOperand(1, IC.Builder->getTrue()); return &II; @@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (auto *CSrc0 = dyn_cast(Src0)) { if (auto *CSrc1 = dyn_cast(Src1)) { Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); - return replaceInstUsesWith(*II, - ConstantExpr::getSExt(CCmp, II->getType())); + if (CCmp->isNullValue()) { + return replaceInstUsesWith( + *II, ConstantExpr::getSExt(CCmp, II->getType())); + } + + // The result of V_ICMP/V_FCMP assembly instructions (which this + // intrinsic exposes) is one bit per thread, masked with the EXEC + // register (which contains the bitmask of live threads). So a + // comparison that always returns true is the same as a read of the + // EXEC register. + Value *NewF = Intrinsic::getDeclaration( + II->getModule(), Intrinsic::read_register, II->getType()); + Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; + MDNode *MD = MDNode::get(II->getContext(), MDArgs); + Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; + CallInst *NewCall = Builder->CreateCall(NewF, Args); + NewCall->addAttribute(AttributeList::FunctionIndex, + Attribute::Convergent); + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); } // Canonicalize constants to RHS. @@ -3599,9 +3617,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. - APInt KnownZero(1, 0), KnownOne(1, 0); - computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II); - if (KnownOne.isAllOnesValue()) + KnownBits Known(1); + computeKnownBits(IIOperand, Known, 0, II); + if (Known.One.isAllOnesValue()) return eraseInstFromFunction(*II); // Update the cache of affected values for this assumption (we might be diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 9127ddca5915..312d9baae43a 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,9 +14,10 @@ #include "InstCombineInternal.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -676,11 +677,10 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: - uint32_t BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI); + KnownBits Known(Op1C->getType()->getBitWidth()); + computeKnownBits(ICI->getOperand(0), Known, 0, &CI); - APInt KnownZeroMask(~KnownZero); + APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoTransform) return ICI; @@ -726,13 +726,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI); - computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI); + KnownBits KnownLHS(BitWidth); + KnownBits KnownRHS(BitWidth); + computeKnownBits(LHS, KnownLHS, 0, &CI); + computeKnownBits(RHS, KnownRHS, 0, &CI); - if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { - APInt KnownBits = KnownZeroLHS | KnownOneLHS; + if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) { + APInt KnownBits = KnownLHS.Zero | KnownLHS.One; APInt UnknownBit = ~KnownBits; if (UnknownBit.countPopulation() == 1) { if (!DoTransform) return ICI; @@ -740,7 +740,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Value *Result = Builder->CreateXor(LHS, RHS); // Mask off any bits that are set and won't be shifted away. - if (KnownOneLHS.uge(UnknownBit)) + if (KnownLHS.One.uge(UnknownBit)) Result = Builder->CreateAnd(Result, ConstantInt::get(ITy, UnknownBit)); @@ -1049,10 +1049,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { if (ICI->hasOneUse() && ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI); + KnownBits Known(BitWidth); + computeKnownBits(Op0, Known, 0, &CI); - APInt KnownZeroMask(~KnownZero); + APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { Value *In = ICI->getOperand(0); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 003029ae39d5..d846a631b96f 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -175,19 +176,18 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { /// Given a signed integer type and a set of known zero and one bits, compute /// the maximum and minimum values that could have the specified known zero and /// known one bits, returning them in Min/Max. -static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, +/// TODO: Move to method on KnownBits struct? +static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known, APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && + assert(Known.getBitWidth() == Min.getBitWidth() && + Known.getBitWidth() == Max.getBitWidth() && "KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); + APInt UnknownBits = ~(Known.Zero|Known.One); // The minimum value is when all unknown bits are zeros, EXCEPT for the sign // bit if it is unknown. - Min = KnownOne; - Max = KnownOne|UnknownBits; + Min = Known.One; + Max = Known.One|UnknownBits; if (UnknownBits.isNegative()) { // Sign bit is unknown Min.setBit(Min.getBitWidth()-1); @@ -198,19 +198,18 @@ static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero, /// Given an unsigned integer type and a set of known zero and one bits, compute /// the maximum and minimum values that could have the specified known zero and /// known one bits, returning them in Min/Max. -static void computeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, +/// TODO: Move to method on KnownBits struct? +static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known, APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && + assert(Known.getBitWidth() == Min.getBitWidth() && + Known.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); + APInt UnknownBits = ~(Known.Zero|Known.One); // The minimum value is when the unknown bits are all zeros. - Min = KnownOne; + Min = Known.One; // The maximum value is when the unknown bits are all ones. - Max = KnownOne|UnknownBits; + Max = Known.One|UnknownBits; } /// This is called when we see this pattern: @@ -1479,14 +1478,14 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, // of the high bits truncated out of x are known. unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), SrcBits = X->getType()->getScalarSizeInBits(); - APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - computeKnownBits(X, KnownZero, KnownOne, 0, &Cmp); + KnownBits Known(SrcBits); + computeKnownBits(X, Known, 0, &Cmp); // If all the high bits are known, we can do this xform. - if ((KnownZero | KnownOne).countLeadingOnes() >= SrcBits - DstBits) { + if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) { // Pull in the high bits from known-ones set. APInt NewRHS = C->zext(SrcBits); - NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); + NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS)); } } @@ -4001,16 +4000,16 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { IsSignBit = isSignBitCheck(Pred, *CmpC, UnusedBit); } - APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); - APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); + KnownBits Op0Known(BitWidth); + KnownBits Op1Known(BitWidth); if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth, IsSignBit), - Op0KnownZero, Op0KnownOne, 0)) + Op0Known, 0)) return &I; if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) + Op1Known, 0)) return &I; // Given the known and unknown bits, compute a range that the LHS could be @@ -4019,15 +4018,11 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); if (I.isSigned()) { - computeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min, - Op0Max); - computeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min, - Op1Max); + computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); + computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); } else { - computeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min, - Op0Max); - computeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min, - Op1Max); + computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); + computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); } // If Min and Max are known to be the same, then SimplifyDemandedBits @@ -4054,8 +4049,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // If all bits are known zero except for one, then we know at most one bit // is set. If the comparison is against zero, then this is a check to see if // *that* bit is set. - APInt Op0KnownZeroInverted = ~Op0KnownZero; - if (~Op1KnownZero == 0) { + APInt Op0KnownZeroInverted = ~Op0Known.Zero; + if (~Op1Known.Zero == 0) { // If the LHS is an AND with the same constant, look through it. Value *LHS = nullptr; const APInt *LHSC; @@ -4193,8 +4188,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // Turn a signed comparison into an unsigned one if both operands are known to // have the same sign. if (I.isSigned() && - ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || - (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) + ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) || + (Op0Known.One.isNegative() && Op1Known.One.isNegative()))) return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); return nullptr; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 71000063ab3c..776686d3d117 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -489,10 +489,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner return nullptr; // Don't do anything with FI } - void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + void computeKnownBits(Value *V, KnownBits &Known, unsigned Depth, Instruction *CxtI) const { - return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI, - &DT); + return llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); } bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0, @@ -536,24 +535,23 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner /// \brief Attempts to replace V with a simpler value based on the demanded /// bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - Instruction *CxtI); + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known, + unsigned Depth, Instruction *CxtI); bool SimplifyDemandedBits(Instruction *I, unsigned Op, - const APInt &DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0); + const APInt &DemandedMask, KnownBits &Known, + unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne /// bits. It also tries to handle simplifications that can be done based on /// DemandedMask, but without modifying the Instruction. Value *SimplifyMultipleUseDemandedBits(Instruction *I, const APInt &DemandedMask, - APInt &KnownZero, APInt &KnownOne, + KnownBits &Known, unsigned Depth, Instruction *CxtI); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. - Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, - const APInt &DemandedMask, APInt &KnownZero, - APInt &KnownOne); + Value *simplifyShrShlDemandedBits( + Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, + const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known); /// \brief Tries to simplify operands to an integer instruction based on its /// demanded bits. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5d6d899da4b5..76829c5e457b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -1476,11 +1477,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // The motivation for this call into value tracking is to take advantage of // the assumption cache, so make sure that is populated. if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { - APInt KnownOne(1, 0), KnownZero(1, 0); - computeKnownBits(CondVal, KnownZero, KnownOne, 0, &SI); - if (KnownOne == 1) + KnownBits Known(1); + computeKnownBits(CondVal, Known, 0, &SI); + if (Known.One == 1) return replaceInstUsesWith(SI, TrueVal); - if (KnownZero == 1) + if (Known.Zero == 1) return replaceInstUsesWith(SI, FalseVal); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 2ba052b7e02d..8d0ed8532779 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -26,7 +27,7 @@ using namespace llvm::PatternMatch; /// constant integer. If so, check to see if there are any bits set in the /// constant that are not demanded. If so, shrink the constant and return true. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded) { + const APInt &Demanded) { assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -37,13 +38,11 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded = Demanded.zextOrTrunc(C->getBitWidth()); if (C->isSubsetOf(Demanded)) return false; // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= *C; - I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded)); + I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded)); return true; } @@ -54,10 +53,10 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, /// the instruction has any properties that allow us to simplify its operands. bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + KnownBits Known(BitWidth); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known, 0, &Inst); if (!V) return false; if (V == &Inst) return true; @@ -70,11 +69,11 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { /// change and false otherwise. bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, - APInt &KnownZero, APInt &KnownOne, + KnownBits &Known, unsigned Depth) { Use &U = I->getOperandUse(OpNo); - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, - KnownOne, Depth, I); + Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known, + Depth, I); if (!NewVal) return false; U = NewVal; return true; @@ -88,15 +87,16 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, /// with a constant or one of its operands. In such cases, this function does /// the replacement and returns true. In all other cases, it returns false after /// analyzing the expression and setting KnownOne and known to be one in the -/// expression. KnownZero contains all the bits that are known to be zero in the -/// expression. These are provided to potentially allow the caller (which might -/// recursively be SimplifyDemandedBits itself) to simplify the expression. -/// KnownOne and KnownZero always follow the invariant that: -/// KnownOne & KnownZero == 0. -/// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and -/// KnownZero may only be accurate for those bits set in DemandedMask. Note also -/// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the -/// same. +/// expression. Known.Zero contains all the bits that are known to be zero in +/// the expression. These are provided to potentially allow the caller (which +/// might recursively be SimplifyDemandedBits itself) to simplify the +/// expression. +/// Known.One and Known.Zero always follow the invariant that: +/// Known.One & Known.Zero == 0. +/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and +/// Known.Zero may only be accurate for those bits set in DemandedMask. Note +/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all +/// be the same. /// /// This returns null if it did not change anything and it permits no /// simplification. This returns V itself if it did some simplification of V's @@ -104,8 +104,7 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, /// some other non-null value if it found out that V is equal to another value /// in the context where the specified bits are demanded, but not for all users. Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth, + KnownBits &Known, unsigned Depth, Instruction *CxtI) { assert(V != nullptr && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); @@ -113,18 +112,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Type *VTy = V->getType(); assert( (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); + Known.getBitWidth() == BitWidth && + "Value *V, DemandedMask and Known must have same BitWidth"); if (isa(V)) { - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); return nullptr; } - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.Zero.clearAllBits(); + Known.One.clearAllBits(); if (DemandedMask == 0) // Not demanding any bits from V. return UndefValue::get(VTy); @@ -133,20 +130,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *I = dyn_cast(V); if (!I) { - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); return nullptr; // Only analyze instructions. } // If there are multiple uses of this value and we aren't at the root, then // we can't do any simplifications of the operands, because DemandedMask // only reflects the bits demanded by *one* of the users. - if (Depth != 0 && !I->hasOneUse()) { - return SimplifyMultipleUseDemandedBits(I, DemandedMask, KnownZero, KnownOne, - Depth, CxtI); - } + if (Depth != 0 && !I->hasOneUse()) + return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI); - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedMask to all bits so that we can try to simplify the @@ -157,22 +151,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownZero, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, + Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // Output known-0 are known to be clear if zero in either the LHS | RHS. - APInt IKnownZero = RHSKnownZero | LHSKnownZero; + APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; // Output known-1 bits are only known if set in both the LHS & RHS. - APInt IKnownOne = RHSKnownOne & LHSKnownOne; + APInt IKnownOne = RHSKnown.One & LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -181,33 +174,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) return I; - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Or: { // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownOne, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, + Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // Output known-0 bits are only known if clear in both the LHS & RHS. - APInt IKnownZero = RHSKnownZero & LHSKnownZero; - // Output known-1 are known to be set if set in either the LHS | RHS. - APInt IKnownOne = RHSKnownOne | LHSKnownOne; + APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; + // Output known-1 are known. to be set if s.et in either the LHS | RHS. + APInt IKnownOne = RHSKnown.One | LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -216,34 +208,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Xor: { - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); + APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | + (RHSKnown.One & LHSKnown.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); + APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | + (RHSKnown.One & LHSKnown.Zero); // If the client is only demanding bits that we know, return the known // constant. @@ -252,15 +242,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if (DemandedMask.isSubsetOf(RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) { + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) { Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -271,10 +261,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) && - RHSKnownOne.isSubsetOf(LHSKnownOne)) { + if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) && + RHSKnown.One.isSubsetOf(LHSKnown.One)) { Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); + ~RHSKnown.One & DemandedMask); Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); return InsertNewInstWith(And, *I); } @@ -292,10 +282,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && isa(I->getOperand(1)) && isa(LHSInst->getOperand(1)) && - (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { ConstantInt *AndRHS = cast(LHSInst->getOperand(1)); ConstantInt *XorRHS = cast(I->getOperand(1)); - APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); @@ -309,9 +299,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZero = std::move(IKnownZero); + Known.Zero = std::move(IKnownZero); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = std::move(IKnownOne); + Known.One = std::move(IKnownOne); break; } case Instruction::Select: @@ -321,13 +311,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 1, DemandedMask, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); + assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(I, 1, DemandedMask) || @@ -335,21 +323,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; // Only known if known in both the LHS and RHS. - KnownOne = RHSKnownOne & LHSKnownOne; - KnownZero = RHSKnownZero & LHSKnownZero; + Known.One = RHSKnown.One & LHSKnown.One; + Known.Zero = RHSKnown.Zero & LHSKnown.Zero; break; case Instruction::Trunc: { unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask = DemandedMask.zext(truncBf); - KnownZero = KnownZero.zext(truncBf); - KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + Known.Zero = Known.Zero.zext(truncBf); + Known.One = Known.One.zext(truncBf); + if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; DemandedMask = DemandedMask.trunc(BitWidth); - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.trunc(BitWidth); + Known.One = Known.One.trunc(BitWidth); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); break; } case Instruction::BitCast: @@ -369,27 +356,25 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Don't touch a vector-to-scalar bitcast. return nullptr; - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); break; case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask = DemandedMask.trunc(SrcBitWidth); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + Known.Zero = Known.Zero.trunc(SrcBitWidth); + Known.One = Known.One.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; DemandedMask = DemandedMask.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); // The top bits are known to be zero. - KnownZero.setBitsFrom(SrcBitWidth); + Known.Zero.setBitsFrom(SrcBitWidth); break; } case Instruction::SExt: { @@ -406,27 +391,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits.setBit(SrcBitWidth-1); InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, InputDemandedBits, KnownZero, KnownOne, - Depth + 1)) + Known.Zero = Known.Zero.trunc(SrcBitWidth); + Known.One = Known.One.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedBits, Known, Depth + 1)) return I; InputDemandedBits = InputDemandedBits.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + Known.Zero = Known.Zero.zext(BitWidth); + Known.One = Known.One.zext(BitWidth); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. // If the input sign bit is known zero, or if the NewBits are not demanded // convert this into a zero extension. - if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { + if (Known.Zero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { // Convert to ZExt cast CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); return InsertNewInstWith(NewCast, *I); - } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set - KnownOne |= NewBits; + } else if (Known.One[SrcBitWidth-1]) { // Input sign bit known set + Known.One |= NewBits; } break; } @@ -440,11 +424,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // significant bit and all those below it. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || - SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnownZero, LHSKnownOne, - Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || ShrinkDemandedConstant(I, 1, DemandedFromOps) || - SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnownZero, RHSKnownOne, - Depth + 1)) { + SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { // Disable the nsw and nuw flags here: We can no longer guarantee that // we won't wrap after simplification. Removing the nsw/nuw flags is // legal here because the top bit is not demanded. @@ -456,30 +438,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If we are known to be adding/subtracting zeros to every bit below // the highest demanded bit, we just return the other side. - if ((DemandedFromOps & RHSKnownZero) == DemandedFromOps) + if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); // We can't do this with the LHS for subtraction. if (I->getOpcode() == Instruction::Add && - (DemandedFromOps & LHSKnownZero) == DemandedFromOps) + DemandedFromOps.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); } // Otherwise just hand the add/sub off to computeKnownBits to fill in // the known zeros and ones. - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); break; } - case Instruction::Shl: - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - { - Value *VarX; ConstantInt *C1; - if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { - Instruction *Shr = cast(I->getOperand(0)); - Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, - KnownZero, KnownOne); - if (R) - return R; - } + case Instruction::Shl: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { + const APInt *ShrAmt; + if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) { + Instruction *Shr = cast(I->getOperand(0)); + if (Value *R = simplifyShrShlDemandedBits( + Shr, *ShrAmt, I, *SA, DemandedMask, Known)) + return R; } uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); @@ -492,17 +472,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, else if (IOp->hasNoUnsignedWrap()) DemandedMaskIn.setHighBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + Known.Zero <<= ShiftAmt; + Known.One <<= ShiftAmt; // low bits known zero. if (ShiftAmt) - KnownZero.setLowBits(ShiftAmt); + Known.Zero.setLowBits(ShiftAmt); } break; + } case Instruction::LShr: { const APInt *SA; if (match(I->getOperand(1), m_APInt(SA))) { @@ -516,14 +496,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast(I)->isExact()) DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero.lshrInPlace(ShiftAmt); - KnownOne.lshrInPlace(ShiftAmt); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShiftAmt); + Known.One.lshrInPlace(ShiftAmt); if (ShiftAmt) - KnownZero.setHighBits(ShiftAmt); // high bits known zero. + Known.Zero.setHighBits(ShiftAmt); // high bits known zero. } break; } @@ -560,15 +539,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast(I)->isExact()) DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - KnownZero.lshrInPlace(ShiftAmt); - KnownOne.lshrInPlace(ShiftAmt); + Known.Zero.lshrInPlace(ShiftAmt); + Known.One.lshrInPlace(ShiftAmt); // Handle the sign bits. APInt SignMask(APInt::getSignMask(BitWidth)); @@ -577,14 +555,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || - (HighBits & ~DemandedMask) == HighBits) { + if (BitWidth <= ShiftAmt || Known.Zero[BitWidth-ShiftAmt-1] || + !DemandedMask.intersects(HighBits)) { BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), I->getOperand(1)); LShr->setIsExact(cast(I)->isExact()); return InsertNewInstWith(LShr, *I); - } else if ((KnownOne & SignMask) != 0) { // New bits are known one. - KnownOne |= HighBits; + } else if (Known.One.intersects(SignMask)) { // New bits are known one. + Known.One |= HighBits; } } break; @@ -602,25 +580,24 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); - if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1)) return I; // The low bits of LHS are unchanged by the srem. - KnownZero = LHSKnownZero & LowBits; - KnownOne = LHSKnownOne & LowBits; + Known.Zero = LHSKnown.Zero & LowBits; + Known.One = LHSKnown.One & LowBits; // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero)) + Known.Zero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0)) - KnownOne |= ~LowBits; + if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One)) + Known.One |= ~LowBits; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); break; } } @@ -628,22 +605,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. if (DemandedMask.isSignBitSet()) { - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. - if (LHSKnownZero.isSignBitSet()) - KnownZero.setSignBit(); + if (LHSKnown.Zero.isSignBitSet()) + Known.Zero.setSignBit(); } break; case Instruction::URem: { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + KnownBits Known2(BitWidth); APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I, 0, AllOnes, KnownZero2, KnownOne2, Depth + 1) || - SimplifyDemandedBits(I, 1, AllOnes, KnownZero2, KnownOne2, Depth + 1)) + if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) || + SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1)) return I; - unsigned Leaders = KnownZero2.countLeadingOnes(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; + unsigned Leaders = Known2.Zero.countLeadingOnes(); + Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; break; } case Instruction::Call: @@ -707,56 +683,54 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return ConstantInt::getNullValue(VTy); // We know that the upper bits are set to zero. - KnownZero.setBitsFrom(ArgWidth); + Known.Zero.setBitsFrom(ArgWidth); return nullptr; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero.setBitsFrom(32); + Known.Zero.setBitsFrom(32); return nullptr; } } - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); break; } // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) - return Constant::getIntegerValue(VTy, KnownOne); + if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) + return Constant::getIntegerValue(VTy, Known.One); return nullptr; } -/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne +/// Helper routine of SimplifyDemandedUseBits. It computes Known /// bits. It also tries to handle simplifications that can be done based on /// DemandedMask, but without modifying the Instruction. Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, unsigned Depth, Instruction *CxtI) { unsigned BitWidth = DemandedMask.getBitWidth(); Type *ITy = I->getType(); - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + KnownBits LHSKnown(BitWidth); + KnownBits RHSKnown(BitWidth); // Despite the fact that we can't simplify this instruction in all User's - // context, we can at least compute the knownzero/knownone bits, and we can + // context, we can at least compute the known bits, and we can // do simplifications that apply to *just* the one user if we know that // this instruction has a simpler value in that context. switch (I->getOpcode()) { case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // Output known-0 are known to be clear if zero in either the LHS | RHS. - APInt IKnownZero = RHSKnownZero | LHSKnownZero; + APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; // Output known-1 bits are only known if set in both the LHS & RHS. - APInt IKnownOne = RHSKnownOne & LHSKnownOne; + APInt IKnownOne = RHSKnown.One & LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -766,13 +740,13 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this // context. - if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) return I->getOperand(1); - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Or: { @@ -780,15 +754,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // Output known-0 bits are only known if clear in both the LHS & RHS. - APInt IKnownZero = RHSKnownZero & LHSKnownZero; + APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - APInt IKnownOne = RHSKnownOne | LHSKnownOne; + APInt IKnownOne = RHSKnown.One | LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. @@ -798,30 +771,29 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this // context. - if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) return I->getOperand(1); - KnownZero = std::move(IKnownZero); - KnownOne = std::move(IKnownOne); + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Xor: { // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); + APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | + (RHSKnown.One & LHSKnown.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); + APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | + (RHSKnown.One & LHSKnown.Zero); // If the client is only demanding bits that we know, return the known // constant. @@ -830,25 +802,25 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, // If all of the demanded bits are known zero on one side, return the // other. - if (DemandedMask.isSubsetOf(RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - if (DemandedMask.isSubsetOf(LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZero = std::move(IKnownZero); + Known.Zero = std::move(IKnownZero); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = std::move(IKnownOne); + Known.One = std::move(IKnownOne); break; } default: - // Compute the KnownZero/KnownOne bits to simplify things downstream. - computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); + // Compute the Known bits to simplify things downstream. + computeKnownBits(I, Known, Depth, CxtI); // If this user is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) - return Constant::getIntegerValue(ITy, KnownOne); + if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) + return Constant::getIntegerValue(ITy, Known.One); break; } @@ -874,29 +846,26 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, /// /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was /// not successful. -Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, - Instruction *Shl, - const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne) { - - const APInt &ShlOp1 = cast(Shl->getOperand(1))->getValue(); - const APInt &ShrOp1 = cast(Shr->getOperand(1))->getValue(); +Value * +InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, + Instruction *Shl, const APInt &ShlOp1, + const APInt &DemandedMask, + KnownBits &Known) { if (!ShlOp1 || !ShrOp1) - return nullptr; // Noop. + return nullptr; // No-op. Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); - unsigned BitWidth = Ty->getIntegerBitWidth(); + unsigned BitWidth = Ty->getScalarSizeInBits(); if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) return nullptr; // Undef. unsigned ShlAmt = ShlOp1.getZExtValue(); unsigned ShrAmt = ShrOp1.getZExtValue(); - KnownOne.clearAllBits(); - KnownZero.setLowBits(ShlAmt - 1); - KnownZero &= DemandedMask; + Known.One.clearAllBits(); + Known.Zero.setLowBits(ShlAmt - 1); + Known.Zero &= DemandedMask; APInt BitMask1(APInt::getAllOnesValue(BitWidth)); APInt BitMask2(APInt::getAllOnesValue(BitWidth)); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 81f2d9fa179f..4729c79ca4c3 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -60,6 +60,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" @@ -641,14 +642,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) { // They do! Return "L op' R". ++NumExpand; - // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. - if ((L == A && R == B) || - (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) - return Op0; - // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) - return V; - // Otherwise, create a new instruction. C = Builder->CreateBinOp(InnerOpcode, L, R); C->takeName(&I); return C; @@ -666,14 +659,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) { // They do! Return "L op' R". ++NumExpand; - // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. - if ((L == B && R == C) || - (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) - return Op1; - // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) - return V; - // Otherwise, create a new instruction. A = Builder->CreateBinOp(InnerOpcode, L, R); A->takeName(&I); return A; @@ -2196,11 +2181,10 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { // There might be assume intrinsics dominating this return that completely // determine the value. If so, constant fold it. - unsigned BitWidth = VTy->getPrimitiveSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI); - if ((KnownZero|KnownOne).isAllOnesValue()) - RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne)); + KnownBits Known(VTy->getPrimitiveSizeInBits()); + computeKnownBits(ResultOp, Known, 0, &RI); + if ((Known.Zero|Known.One).isAllOnesValue()) + RI.setOperand(0, Constant::getIntegerValue(VTy, Known.One)); return nullptr; } @@ -2279,10 +2263,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { } unsigned BitWidth = cast(Cond->getType())->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI); - unsigned LeadingKnownZeros = KnownZero.countLeadingOnes(); - unsigned LeadingKnownOnes = KnownOne.countLeadingOnes(); + KnownBits Known(BitWidth); + computeKnownBits(Cond, Known, 0, &SI); + unsigned LeadingKnownZeros = Known.Zero.countLeadingOnes(); + unsigned LeadingKnownOnes = Known.One.countLeadingOnes(); // Compute the number of leading bits we can ignore. // TODO: A better way to determine this would use ComputeNumSignBits(). @@ -2879,11 +2863,10 @@ bool InstCombiner::run() { Type *Ty = I->getType(); if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { unsigned BitWidth = Ty->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I); - if ((KnownZero | KnownOne).isAllOnesValue()) { - Constant *C = ConstantInt::get(Ty, KnownOne); + KnownBits Known(BitWidth); + computeKnownBits(I, Known, /*Depth*/0, I); + if ((Known.Zero | Known.One).isAllOnesValue()) { + Constant *C = ConstantInt::get(Ty, Known.One); DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << " from: " << *I << '\n'); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 036dd8d39a08..b866958e3c4b 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -265,11 +265,10 @@ static cl::opt cl::Hidden, cl::init(false)); static cl::opt - ClUseMachOGlobalsSection("asan-globals-live-support", - cl::desc("Use linker features to support dead " - "code stripping of globals " - "(Mach-O only)"), - cl::Hidden, cl::init(true)); + ClUseGlobalsGC("asan-globals-live-support", + cl::desc("Use linker features to support dead " + "code stripping of globals"), + cl::Hidden, cl::init(true)); // Debug flags. static cl::opt ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -594,13 +593,15 @@ struct AddressSanitizer : public FunctionPass { }; class AddressSanitizerModule : public ModulePass { - public: +public: explicit AddressSanitizerModule(bool CompileKernel = false, - bool Recover = false) + bool Recover = false, + bool UseGlobalsGC = true) : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), - Recover(Recover || ClRecover) {} + Recover(Recover || ClRecover), + UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC) {} bool runOnModule(Module &M) override; - static char ID; // Pass identification, replacement for typeid + static char ID; // Pass identification, replacement for typeid StringRef getPassName() const override { return "AddressSanitizerModule"; } private: @@ -635,6 +636,7 @@ class AddressSanitizerModule : public ModulePass { GlobalsMetadata GlobalsMD; bool CompileKernel; bool Recover; + bool UseGlobalsGC; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; @@ -913,9 +915,10 @@ INITIALIZE_PASS( "ModulePass", false, false) ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, - bool Recover) { + bool Recover, + bool UseGlobalsGC) { assert(!CompileKernel || Recover); - return new AddressSanitizerModule(CompileKernel, Recover); + return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -1537,9 +1540,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // binary in order to allow the linker to properly dead strip. This is only // supported on recent versions of ld64. bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const { - if (!ClUseMachOGlobalsSection) - return false; - if (!TargetTriple.isOSBinFormatMachO()) return false; @@ -1911,9 +1911,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Initializers[i] = Initializer; } - if (TargetTriple.isOSBinFormatCOFF()) { + if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); - } else if (ShouldUseMachOGlobalsSection()) { + } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) { InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); } else { InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); diff --git a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 61d627673c90..d7eb857cff7e 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -943,14 +943,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { BasicBlock *BB = MI->getParent(); DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); DEBUG(dbgs() << *BB << "\n"); + auto OrigBBFreq = BFI.getBlockFreq(BB); BasicBlock *DefaultBB = SplitBlock(BB, MI); BasicBlock::iterator It(*MI); ++It; assert(It != DefaultBB->end()); BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); - DefaultBB->setName("MemOP.Default"); MergeBB->setName("MemOP.Merge"); + BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); + DefaultBB->setName("MemOP.Default"); auto &Ctx = Func.getContext(); IRBuilder<> IRB(BB); diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp index a5afc8ad977c..c1bbc4e96b16 100644 --- a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp +++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp @@ -351,8 +351,10 @@ bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, ARCInstKind Class) { - // Check for possible releases. - if (!CanAlterRefCount(Inst, Ptr, PA, Class)) + // Check for possible releases. Treat clang.arc.use as a releasing instruction + // to prevent sinking a retain past it. + if (!CanAlterRefCount(Inst, Ptr, PA, Class) && + Class != ARCInstKind::IntrinsicUser) return false; DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index ee6333e88716..f62e111460ca 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -53,6 +53,12 @@ using namespace consthoist; STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); +static cl::opt ConstHoistWithBlockFrequency( + "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to reduce the " + "chance to execute const materialization more frequently than " + "without hoisting.")); + namespace { /// \brief The constant hoisting pass. class ConstantHoistingLegacyPass : public FunctionPass { @@ -68,6 +74,8 @@ class ConstantHoistingLegacyPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (ConstHoistWithBlockFrequency) + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -82,6 +90,7 @@ class ConstantHoistingLegacyPass : public FunctionPass { char ConstantHoistingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist", @@ -99,9 +108,13 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) { DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n"); DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); - bool MadeChange = Impl.runImpl( - Fn, getAnalysis().getTTI(Fn), - getAnalysis().getDomTree(), Fn.getEntryBlock()); + bool MadeChange = + Impl.runImpl(Fn, getAnalysis().getTTI(Fn), + getAnalysis().getDomTree(), + ConstHoistWithBlockFrequency + ? &getAnalysis().getBFI() + : nullptr, + Fn.getEntryBlock()); if (MadeChange) { DEBUG(dbgs() << "********** Function after Constant Hoisting: " @@ -148,33 +161,142 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst, return IDom->getBlock()->getTerminator(); } +/// \brief Given \p BBs as input, find another set of BBs which collectively +/// dominates \p BBs and have the minimal sum of frequencies. Return the BB +/// set found in \p BBs. +void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, + BasicBlock *Entry, + SmallPtrSet &BBs) { + assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); + // Nodes on the current path to the root. + SmallPtrSet Path; + // Candidates includes any block 'BB' in set 'BBs' that is not strictly + // dominated by any other blocks in set 'BBs', and all nodes in the path + // in the dominator tree from Entry to 'BB'. + SmallPtrSet Candidates; + for (auto BB : BBs) { + Path.clear(); + // Walk up the dominator tree until Entry or another BB in BBs + // is reached. Insert the nodes on the way to the Path. + BasicBlock *Node = BB; + // The "Path" is a candidate path to be added into Candidates set. + bool isCandidate = false; + do { + Path.insert(Node); + if (Node == Entry || Candidates.count(Node)) { + isCandidate = true; + break; + } + assert(DT.getNode(Node)->getIDom() && + "Entry doens't dominate current Node"); + Node = DT.getNode(Node)->getIDom()->getBlock(); + } while (!BBs.count(Node)); + + // If isCandidate is false, Node is another Block in BBs dominating + // current 'BB'. Drop the nodes on the Path. + if (!isCandidate) + continue; + + // Add nodes on the Path into Candidates. + Candidates.insert(Path.begin(), Path.end()); + } + + // Sort the nodes in Candidates in top-down order and save the nodes + // in Orders. + unsigned Idx = 0; + SmallVector Orders; + Orders.push_back(Entry); + while (Idx != Orders.size()) { + BasicBlock *Node = Orders[Idx++]; + for (auto ChildDomNode : DT.getNode(Node)->getChildren()) { + if (Candidates.count(ChildDomNode->getBlock())) + Orders.push_back(ChildDomNode->getBlock()); + } + } + + // Visit Orders in bottom-up order. + typedef std::pair, BlockFrequency> + InsertPtsCostPair; + // InsertPtsMap is a map from a BB to the best insertion points for the + // subtree of BB (subtree not including the BB itself). + DenseMap InsertPtsMap; + InsertPtsMap.reserve(Orders.size() + 1); + for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) { + BasicBlock *Node = *RIt; + bool NodeInBBs = BBs.count(Node); + SmallPtrSet &InsertPts = InsertPtsMap[Node].first; + BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second; + + // Return the optimal insert points in BBs. + if (Node == Entry) { + BBs.clear(); + if (InsertPtsFreq > BFI.getBlockFreq(Node)) + BBs.insert(Entry); + else + BBs.insert(InsertPts.begin(), InsertPts.end()); + break; + } + + BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock(); + // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child + // will update its parent's ParentInsertPts and ParentPtsFreq. + SmallPtrSet &ParentInsertPts = InsertPtsMap[Parent].first; + BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; + // Choose to insert in Node or in subtree of Node. + if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) { + ParentInsertPts.insert(Node); + ParentPtsFreq += BFI.getBlockFreq(Node); + } else { + ParentInsertPts.insert(InsertPts.begin(), InsertPts.end()); + ParentPtsFreq += InsertPtsFreq; + } + } +} + /// \brief Find an insertion point that dominates all uses. -Instruction *ConstantHoistingPass::findConstantInsertionPoint( +SmallPtrSet ConstantHoistingPass::findConstantInsertionPoint( const ConstantInfo &ConstInfo) const { assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); // Collect all basic blocks. SmallPtrSet BBs; + SmallPtrSet InsertPts; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); - if (BBs.count(Entry)) - return &Entry->front(); + if (BBs.count(Entry)) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } + + if (BFI) { + findBestInsertionSet(*DT, *BFI, Entry, BBs); + for (auto BB : BBs) { + BasicBlock::iterator InsertPt = BB->begin(); + for (; isa(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + ; + InsertPts.insert(&*InsertPt); + } + return InsertPts; + } while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; BB1 = *BBs.begin(); BB2 = *std::next(BBs.begin()); BB = DT->findNearestCommonDominator(BB1, BB2); - if (BB == Entry) - return &Entry->front(); + if (BB == Entry) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } BBs.erase(BB1); BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); Instruction &FirstInst = (*BBs.begin())->front(); - return findMatInsertPt(&FirstInst); + InsertPts.insert(findMatInsertPt(&FirstInst)); + return InsertPts; } @@ -557,29 +679,54 @@ bool ConstantHoistingPass::emitBaseConstants() { bool MadeChange = false; for (auto const &ConstInfo : ConstantVec) { // Hoist and hide the base constant behind a bitcast. - Instruction *IP = findConstantInsertionPoint(ConstInfo); - IntegerType *Ty = ConstInfo.BaseConstant->getType(); - Instruction *Base = - new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); - DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB " - << IP->getParent()->getName() << '\n' << *Base << '\n'); + SmallPtrSet IPSet = findConstantInsertionPoint(ConstInfo); + assert(!IPSet.empty() && "IPSet is empty"); + + unsigned UsesNum = 0; + unsigned ReBasesNum = 0; + for (Instruction *IP : IPSet) { + IntegerType *Ty = ConstInfo.BaseConstant->getType(); + Instruction *Base = + new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); + DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant + << ") to BB " << IP->getParent()->getName() << '\n' + << *Base << '\n'); + + // Emit materialization code for all rebased constants. + unsigned Uses = 0; + for (auto const &RCI : ConstInfo.RebasedConstants) { + for (auto const &U : RCI.Uses) { + Uses++; + BasicBlock *OrigMatInsertBB = + findMatInsertPt(U.Inst, U.OpndIdx)->getParent(); + // If Base constant is to be inserted in multiple places, + // generate rebase for U using the Base dominating U. + if (IPSet.size() == 1 || + DT->dominates(Base->getParent(), OrigMatInsertBB)) { + emitBaseConstants(Base, RCI.Offset, U); + ReBasesNum++; + } + } + } + UsesNum = Uses; + + // Use the same debug location as the last user of the constant. + assert(!Base->use_empty() && "The use list is empty!?"); + assert(isa(Base->user_back()) && + "All uses should be instructions."); + Base->setDebugLoc(cast(Base->user_back())->getDebugLoc()); + } + (void)UsesNum; + (void)ReBasesNum; + // Expect all uses are rebased after rebase is done. + assert(UsesNum == ReBasesNum && "Not all uses are rebased"); + NumConstantsHoisted++; - // Emit materialization code for all rebased constants. - for (auto const &RCI : ConstInfo.RebasedConstants) { - NumConstantsRebased++; - for (auto const &U : RCI.Uses) - emitBaseConstants(Base, RCI.Offset, U); - } + // Base constant is also included in ConstInfo.RebasedConstants, so + // deduct 1 from ConstInfo.RebasedConstants.size(). + NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1; - // Use the same debug location as the last user of the constant. - assert(!Base->use_empty() && "The use list is empty!?"); - assert(isa(Base->user_back()) && - "All uses should be instructions."); - Base->setDebugLoc(cast(Base->user_back())->getDebugLoc()); - - // Correct for base constant, which we counted above too. - NumConstantsRebased--; MadeChange = true; } return MadeChange; @@ -595,9 +742,11 @@ void ConstantHoistingPass::deleteDeadCastInst() const { /// \brief Optimize expensive integer constants in the given function. bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, - DominatorTree &DT, BasicBlock &Entry) { + DominatorTree &DT, BlockFrequencyInfo *BFI, + BasicBlock &Entry) { this->TTI = &TTI; this->DT = &DT; + this->BFI = BFI; this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); @@ -628,7 +777,10 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); - if (!runImpl(F, TTI, DT, F.getEntryBlock())) + auto BFI = ConstHoistWithBlockFrequency + ? &AM.getResult(F) + : nullptr; + if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock())) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index c843c61ea94e..b5a4cc2f3953 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" @@ -26,6 +26,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -95,7 +96,8 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { return true; } -static bool processPHI(PHINode *P, LazyValueInfo *LVI) { +static bool processPHI(PHINode *P, LazyValueInfo *LVI, + const SimplifyQuery &SQ) { bool Changed = false; BasicBlock *BB = P->getParent(); @@ -149,9 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI) { Changed = true; } - // FIXME: Provide TLI, DT, AT to SimplifyInstruction. - const DataLayout &DL = BB->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(P, DL)) { + if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; @@ -488,9 +488,8 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { ConstantInt::getFalse(C->getContext()); } -static bool runImpl(Function &F, LazyValueInfo *LVI) { +static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { bool FnChanged = false; - // Visiting in a pre-order depth-first traversal causes us to simplify early // blocks before querying later blocks (which require us to analyze early // blocks). Eagerly simplifying shallow blocks means there is strictly less @@ -505,7 +504,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI) { BBChanged |= processSelect(cast(II), LVI); break; case Instruction::PHI: - BBChanged |= processPHI(cast(II), LVI); + BBChanged |= processPHI(cast(II), LVI, SQ); break; case Instruction::ICmp: case Instruction::FCmp: @@ -566,14 +565,25 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { return false; LazyValueInfo *LVI = &getAnalysis().getLVI(); - return runImpl(F, LVI); + auto *DTWP = getAnalysisIfAvailable(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *TLIWP = getAnalysisIfAvailable(); + auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *ACWP = getAnalysisIfAvailable(); + auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; + const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); + return runImpl(F, LVI, SQ); } PreservedAnalyses CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { LazyValueInfo *LVI = &AM.getResult(F); - bool Changed = runImpl(F, LVI); + auto *DT = AM.getCachedResult(F); + auto *TLI = AM.getCachedResult(F); + auto *AC = AM.getCachedResult(F); + const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); + bool Changed = runImpl(F, LVI, SQ); if (!Changed) return PreservedAnalyses::all(); diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp index 7019287954a1..48eda09c463e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -51,6 +51,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -537,9 +538,9 @@ bool GuardWideningImpl::parseRangeChecks( } else if (match(Check.getBase(), m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(OpLHS, KnownZero, KnownOne, DL); - if ((OpRHS->getValue() & KnownZero) == OpRHS->getValue()) { + KnownBits Known(BitWidth); + computeKnownBits(OpLHS, Known, DL); + if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { Check.setBase(OpLHS); APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); Check.setOffset(ConstantInt::get(Ctx, NewOffset)); diff --git a/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 5d8701431a2c..9e2563879da2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -152,15 +152,15 @@ class InferAddressSpaces : public FunctionPass { Function *F) const; void appendsFlatAddressExpressionToPostorderStack( - Value *V, std::vector> *PostorderStack, - DenseSet *Visited) const; + Value *V, std::vector> &PostorderStack, + DenseSet &Visited) const; bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, Value *NewV) const; void collectRewritableIntrinsicOperands( IntrinsicInst *II, - std::vector> *PostorderStack, - DenseSet *Visited) const; + std::vector> &PostorderStack, + DenseSet &Visited) const; std::vector collectFlatAddressExpressions(Function &F) const; @@ -204,7 +204,6 @@ static bool isAddressExpression(const Value &V) { // // Precondition: V is an address expression. static SmallVector getPointerOperands(const Value &V) { - assert(isAddressExpression(V)); const Operator &Op = cast(V); switch (Op.getOpcode()) { case Instruction::PHI: { @@ -254,8 +253,8 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, // TODO: Move logic to TTI? void InferAddressSpaces::collectRewritableIntrinsicOperands( - IntrinsicInst *II, std::vector> *PostorderStack, - DenseSet *Visited) const { + IntrinsicInst *II, std::vector> &PostorderStack, + DenseSet &Visited) const { switch (II->getIntrinsicID()) { case Intrinsic::objectsize: case Intrinsic::amdgcn_atomic_inc: @@ -272,13 +271,13 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands( // If V is an unvisited flat address expression, appends V to PostorderStack // and marks it as visited. void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( - Value *V, std::vector> *PostorderStack, - DenseSet *Visited) const { + Value *V, std::vector> &PostorderStack, + DenseSet &Visited) const { assert(V->getType()->isPointerTy()); if (isAddressExpression(*V) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { - if (Visited->insert(V).second) - PostorderStack->push_back(std::make_pair(V, false)); + if (Visited.insert(V).second) + PostorderStack.push_back(std::make_pair(V, false)); } } @@ -293,14 +292,18 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { DenseSet Visited; auto PushPtrOperand = [&](Value *Ptr) { - appendsFlatAddressExpressionToPostorderStack(Ptr, &PostorderStack, - &Visited); + appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, + Visited); }; - // We only explore address expressions that are reachable from loads and - // stores for now because we aim at generating faster loads and stores. + // Look at operations that may be interesting accelerate by moving to a known + // address space. We aim at generating after loads and stores, but pure + // addressing calculations may also be faster. for (Instruction &I : instructions(F)) { - if (auto *LI = dyn_cast(&I)) + if (auto *GEP = dyn_cast(&I)) { + if (!GEP->getType()->isVectorTy()) + PushPtrOperand(GEP->getPointerOperand()); + } else if (auto *LI = dyn_cast(&I)) PushPtrOperand(LI->getPointerOperand()); else if (auto *SI = dyn_cast(&I)) PushPtrOperand(SI->getPointerOperand()); @@ -316,7 +319,7 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { if (auto *MTI = dyn_cast(MI)) PushPtrOperand(MTI->getRawSource()); } else if (auto *II = dyn_cast(&I)) - collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited); + collectRewritableIntrinsicOperands(II, PostorderStack, Visited); else if (ICmpInst *Cmp = dyn_cast(&I)) { // FIXME: Handle vectors of pointers if (Cmp->getOperand(0)->getType()->isPointerTy()) { @@ -338,8 +341,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().second = true; for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { - appendsFlatAddressExpressionToPostorderStack(PtrOperand, &PostorderStack, - &Visited); + appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, + Visited); } } return Postorder; diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 08eb95a1a3d3..a0da81605a80 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1289,6 +1289,36 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we dont/cant duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredToDestList.size() == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + bool SeenFirstBranchToOnlyDest = false; + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. + else + SuccBB->removePredecessor(BB, true); // This is unreachable successor. + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + auto *CondInst = dyn_cast(Cond); + if (CondInst && CondInst->use_empty()) + CondInst->eraseFromParent(); + // FIXME: in case this instruction is defined in the current BB and it + // resolves to a single value from all predecessors, we can do RAUW. + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 946d85d7360f..5042fc18d7c4 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -345,6 +345,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, if (!SI->isSimple()) return false; + // Don't convert stores of non-integral pointer types to memsets (which stores + // integers). + if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) + return false; + // Avoid merging nontemporal stores. if (SI->getMetadata(LLVMContext::MD_nontemporal)) return false; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp index e5689368de80..8ce96cf1b7a6 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -58,13 +58,14 @@ class LoopRotate { AssumptionCache *AC; DominatorTree *DT; ScalarEvolution *SE; + const SimplifyQuery &SQ; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE) - : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) { - } + DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ) + : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), + SQ(SQ) {} bool processLoop(Loop *L); private: @@ -311,8 +312,6 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { for (; PHINode *PN = dyn_cast(I); ++I) ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); @@ -342,8 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - // FIXME: Provide TLI, DT, AC to SimplifyInstruction. - Value *V = SimplifyInstruction(C, DL); + Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C)); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. @@ -671,7 +669,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; - LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE); + const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC); + LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ); bool Changed = LR.processLoop(&L); if (!Changed) @@ -714,7 +714,11 @@ class LoopRotateLegacyPass : public LoopPass { auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE); + auto *TLIWP = getAnalysisIfAvailable(); + auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + const SimplifyQuery SQ(DL, TLI, DT, AC); + LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ); return LR.processLoop(L); } }; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index a99c9999c619..8fa806a7e8bc 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This pass transforms loops that contain branches on loop-invariant conditions -// to have multiple loops. For example, it turns the left into the right code: +// to multiple loops. For example, it turns the left into the right code: // // for (...) if (lic) // A for (...) diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 659353e912fe..49ce0262c97b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -352,20 +352,10 @@ Value *StructurizeCFG::invert(Value *Condition) { if (Instruction *Inst = dyn_cast(Condition)) { // Third: Check all the users for an invert BasicBlock *Parent = Inst->getParent(); - for (User *U : Condition->users()) { - if (Instruction *I = dyn_cast(U)) { + for (User *U : Condition->users()) + if (Instruction *I = dyn_cast(U)) if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; - } - } - - // Avoid creating a new instruction in the common case of a compare. - if (CmpInst *Cmp = dyn_cast(Inst)) { - if (Cmp->hasOneUse()) { - Cmp->setPredicate(Cmp->getInversePredicate()); - return Cmp; - } - } // Last option: Create a new instruction return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 1cfe3bd53648..7ffdad597a9b 100644 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -256,14 +257,14 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V, unsigned HiBits = LongLen - ShortLen; const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - APInt Zeros(LongLen, 0), Ones(LongLen, 0); + KnownBits Known(LongLen); - computeKnownBits(V, Zeros, Ones, DL); + computeKnownBits(V, Known, DL); - if (Zeros.countLeadingOnes() >= HiBits) + if (Known.Zero.countLeadingOnes() >= HiBits) return VALRNG_KNOWN_SHORT; - if (Ones.countLeadingZeros() < HiBits) + if (Known.One.countLeadingZeros() < HiBits) return VALRNG_LIKELY_LONG; // Long integer divisions are often used in hashtable implementations. It's diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 82552684b832..ed72099ec3ed 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -73,24 +73,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { } /// \brief Build a set of blocks to extract if the input blocks are viable. -template -static SetVector buildExtractionBlockSet(IteratorT BBBegin, - IteratorT BBEnd) { +static SetVector +buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT) { + assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; - assert(BBBegin != BBEnd); - // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. - do { - if (!Result.insert(*BBBegin)) - llvm_unreachable("Repeated basic blocks in extraction input"); + for (BasicBlock *BB : BBs) { - if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) { + // If this block is dead, don't process it. + if (DT && !DT->isReachableFromEntry(BB)) + continue; + + if (!Result.insert(BB)) + llvm_unreachable("Repeated basic blocks in extraction input"); + if (!CodeExtractor::isBlockValidForExtraction(*BB)) { Result.clear(); return Result; } - } while (++BBBegin != BBEnd); + } #ifndef NDEBUG for (SetVector::iterator I = std::next(Result.begin()), @@ -106,23 +108,17 @@ static SetVector buildExtractionBlockSet(IteratorT BBBegin, return Result; } -/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. -static SetVector -buildExtractionBlockSet(ArrayRef BBs) { - return buildExtractionBlockSet(BBs.begin(), BBs.end()); -} - CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), NumExitBlocks(~0U) {} /// definedInRegion - Return true if the specified value is defined in the @@ -194,9 +190,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); - BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, - Header->getName()+".ce"); + BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -205,11 +199,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { Blocks.insert(NewBB); Header = NewBB; - // Okay, update dominator sets. The blocks that dominate the new one are the - // blocks that dominate TIBB plus the new block itself. - if (DT) - DT->splitBlock(NewBB); - // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { @@ -224,12 +213,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); + PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 8c5442762643..d3002c5fb750 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -1038,9 +1039,9 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); - unsigned TrailZ = KnownZero.countTrailingOnes(); + KnownBits Known(BitWidth); + computeKnownBits(V, Known, DL, 0, AC, CxtI, DT); + unsigned TrailZ = Known.Zero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. @@ -1268,21 +1269,37 @@ static void appendOffset(SmallVectorImpl &Ops, int64_t Offset) { } } -/// Prepend \p DIExpr with a deref and offset operation. +enum { WithStackValue = true }; + +/// Prepend \p DIExpr with a deref and offset operation and optionally turn it +/// into a stack value. static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, - bool Deref, int64_t Offset) { - if (!Deref && !Offset) + bool Deref, int64_t Offset = 0, + bool StackValue = false) { + if (!Deref && !Offset && !StackValue) return DIExpr; - // Create a copy of the original DIDescriptor for user variable, prepending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector Ops; + + SmallVector Ops; + appendOffset(Ops, Offset); if (Deref) Ops.push_back(dwarf::DW_OP_deref); - appendOffset(Ops, Offset); if (DIExpr) - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + for (auto Op : DIExpr->expr_ops()) { + // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. + if (StackValue) { + if (Op.getOp() == dwarf::DW_OP_stack_value) + StackValue = false; + else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { + Ops.push_back(dwarf::DW_OP_stack_value); + StackValue = false; + } + } + Ops.push_back(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + Ops.push_back(Op.getArg(I)); + } + if (StackValue) + Ops.push_back(dwarf::DW_OP_stack_value); return Builder.createExpression(Ops); } @@ -1374,12 +1391,15 @@ void llvm::salvageDebugInfo(Instruction &I) { unsigned BitWidth = M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); APInt Offset(BitWidth, 0); - // Rewrite a constant GEP into a DIExpression. + // Rewrite a constant GEP into a DIExpression. Since we are performing + // arithmetic to compute the variable's *value* in the DIExpression, we + // need to mark the expression with a DW_OP_stack_value. if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - // GEP offsets are i32 and thus alwaus fit into an int64_t. - DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue()); + // GEP offsets are i32 and thus always fit into an int64_t. + DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(), + WithStackValue); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1391,7 +1411,7 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0); + DIExpr = prependDIExpr(DIB, DIExpr, WithDeref); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 3c669ce644e2..43ab725b0769 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -318,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + // The current loop unroll pass can only unroll loops with a single latch + // that's a conditional branch exiting the loop. + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast(LatchBlock->getTerminator()); @@ -328,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + auto CheckSuccessors = [&](unsigned S1, unsigned S2) { + return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + }; + + if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { + DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" + " exiting the loop can be unrolled\n"); + return false; + } + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index b375d51005d5..8959e77438e9 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); + // Don't handle unreachable blocks. If there are successors with phis, this + // would leave them behind with missing predecessors. + if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) || + CurBlock->getSinglePredecessor() == CurBlock) { + DeleteList.insert(CurBlock); + return; + } + // If there is only the default destination, just branch. if (!SI->getNumCases()) { BranchInst::Create(Default, CurBlock); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 2f575b9d5027..f86e97b6cc72 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -60,6 +60,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -3055,6 +3056,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { BasicBlock *QFB = QBI->getSuccessor(1); BasicBlock *PostBB = QFB->getSingleSuccessor(); + // Make sure we have a good guess for PostBB. If QTB's only successor is + // QFB, then QFB is a better PostBB. + if (QTB->getSingleSuccessor() == QFB) + PostBB = QFB; + + // If we couldn't find a good PostBB, stop. + if (!PostBB) + return false; + bool InvertPCond = false, InvertQCond = false; // Canonicalize fallthroughs to the true branches. if (PFB == QBI->getParent()) { @@ -3079,8 +3089,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; - if (!PostBB || - !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) return false; if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || @@ -3746,7 +3755,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { if (!isa(I)) return false; - SmallSet TrivialUnwindBlocks; + SmallSetVector TrivialUnwindBlocks; auto *PhiLPInst = cast(RI->getValue()); // Check incoming blocks to see if any of them are trivial. @@ -4359,8 +4368,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); - APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); + KnownBits Known(Bits); + computeKnownBits(Cond, Known, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) @@ -4372,7 +4381,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, SmallVector DeadCases; for (auto &Case : SI->cases()) { APInt CaseVal = Case.getCaseValue()->getValue(); - if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne || + if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); @@ -4386,7 +4395,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, bool HasDefault = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = - Bits - (KnownZero | KnownOne).countPopulation(); + Bits - (Known.Zero | Known.One).countPopulation(); assert(NumUnknownBits <= Bits); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp index f6070868de44..27373427d4f7 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -35,10 +35,8 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of redundant instructions removed"); -static bool runImpl(Function &F, const DominatorTree *DT, - const TargetLibraryInfo *TLI, AssumptionCache *AC, +static bool runImpl(Function &F, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { - const DataLayout &DL = F.getParent()->getDataLayout(); SmallPtrSet S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -56,7 +54,8 @@ static bool runImpl(Function &F, const DominatorTree *DT, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) { + if (Value *V = + SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast(U)); @@ -65,7 +64,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, Changed = true; } } - if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) { + if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { // RecursivelyDeleteTriviallyDeadInstruction can remove more than one // instruction, so simply incrementing the iterator does not work. // When instructions get deleted re-iterate instead. @@ -113,8 +112,9 @@ namespace { &getAnalysis().getAssumptionCache(F); OptimizationRemarkEmitter *ORE = &getAnalysis().getORE(); - - return runImpl(F, DT, TLI, AC, ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, TLI, DT, AC); + return runImpl(F, SQ, ORE); } }; } @@ -141,7 +141,9 @@ PreservedAnalyses InstSimplifierPass::run(Function &F, auto &TLI = AM.getResult(F); auto &AC = AM.getResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, &TLI, &DT, &AC); + bool Changed = runImpl(F, SQ, &ORE); if (!Changed) return PreservedAnalyses::all(); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index aa71e3669ea2..2c1c30463a23 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -30,16 +30,13 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; -static cl::opt - ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden, - cl::desc("Treat error-reporting calls as cold")); - static cl::opt EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, cl::init(false), @@ -459,11 +456,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { Value *Offset = GEP->getOperand(2); unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, - nullptr); - KnownZero.flipAllBits(); + KnownBits Known(BitWidth); + computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr); + Known.Zero.flipAllBits(); size_t ArrSize = cast(GEP->getSourceElementType())->getNumElements(); @@ -477,7 +472,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { // optimize if we can prove that the program has undefined behavior when // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || (GEP->isInBounds() && isa(GEP->getOperand(0)) && NullTermIdx == ArrSize - 1)) return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), @@ -846,6 +841,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, // Is the inner call really malloc()? Function *InnerCallee = Malloc->getCalledFunction(); + if (!InnerCallee) + return nullptr; + LibFunc Func; if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || Func != LibFunc_malloc) @@ -930,6 +928,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, if (V == nullptr) return nullptr; + // If call isn't an intrinsic, check that it isn't within a function with the + // same name as the float version of this call. + // + // e.g. inline float expf(float val) { return (float) exp((double) val); } + // + // A similar such definition exists in the MinGW-w64 math.h header file which + // when compiled with -O2 -ffast-math causes the generation of infinite loops + // where expf is called. + if (!Callee->isIntrinsic()) { + const Function *F = CI->getFunction(); + StringRef FName = F->getName(); + StringRef CalleeName = Callee->getName(); + if ((FName.size() == (CalleeName.size() + 1)) && + (FName.back() == 'f') && + FName.startswith(CalleeName)) + return nullptr; + } + // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -1632,7 +1648,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, } static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) + if (!Callee || !Callee->isDeclaration()) return false; if (StreamArg < 0) diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 4409d7a404f8..97dcb40a1d72 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" @@ -65,7 +66,9 @@ class Vectorizer { bool run(); private: - Value *getPointerOperand(Value *I); + Value *getPointerOperand(Value *I) const; + + GetElementPtrInst *getSourceGEP(Value *Src) const; unsigned getPointerAddressSpace(Value *I); @@ -215,7 +218,7 @@ bool Vectorizer::run() { return Changed; } -Value *Vectorizer::getPointerOperand(Value *I) { +Value *Vectorizer::getPointerOperand(Value *I) const { if (LoadInst *LI = dyn_cast(I)) return LI->getPointerOperand(); if (StoreInst *SI = dyn_cast(I)) @@ -231,6 +234,19 @@ unsigned Vectorizer::getPointerAddressSpace(Value *I) { return -1; } +GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const { + // First strip pointer bitcasts. Make sure pointee size is the same with + // and without casts. + // TODO: a stride set by the add instruction below can match the difference + // in pointee type size here. Currently it will not be vectorized. + Value *SrcPtr = getPointerOperand(Src); + Value *SrcBase = SrcPtr->stripPointerCasts(); + if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) == + DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType())) + SrcPtr = SrcBase; + return dyn_cast(SrcPtr); +} + // FIXME: Merge with llvm::isConsecutiveAccess bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { Value *PtrA = getPointerOperand(A); @@ -283,8 +299,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // Look through GEPs after checking they're the same except for the last // index. - GetElementPtrInst *GEPA = dyn_cast(getPointerOperand(A)); - GetElementPtrInst *GEPB = dyn_cast(getPointerOperand(B)); + GetElementPtrInst *GEPA = getSourceGEP(A); + GetElementPtrInst *GEPB = getSourceGEP(B); if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands()) return false; unsigned FinalIndex = GEPA->getNumOperands() - 1; @@ -328,11 +344,9 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // If any bits are known to be zero other than the sign bit in OpA, we can // add 1 to it while guaranteeing no overflow of any sort. if (!Safe) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT); - KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1); - if (KnownZero != 0) + KnownBits Known(BitWidth); + computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT); + if (Known.Zero.countTrailingZeros() < (BitWidth - 1)) Safe = true; } diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7eb8fabe0b2f..87ce0194dad6 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3586,8 +3586,12 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, IRBuilder<> B(MiddleBlock->getTerminator()); Value *CountMinusOne = B.CreateSub( CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1)); - Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(), - "cast.cmo"); + Value *CMO = + !II.getStep()->getType()->isIntegerTy() + ? B.CreateCast(Instruction::SIToFP, CountMinusOne, + II.getStep()->getType()) + : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); + CMO->setName("cast.cmo"); Value *Escape = II.transform(B, CMO, PSE.getSE(), DL); Escape->setName("ind.escape"); MissingVals[UI] = Escape; @@ -4516,14 +4520,15 @@ void InnerLoopVectorizer::predicateInstructions() { for (auto KV : PredicatedInstructions) { BasicBlock::iterator I(KV.first); BasicBlock *Head = I->getParent(); - auto *BB = SplitBlock(Head, &*std::next(I), DT, LI); auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false, /*BranchWeights=*/nullptr, DT, LI); I->moveBefore(T); sinkScalarOperands(&*I); - I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if"); - BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue"); + BasicBlock *PredicatedBlock = I->getParent(); + Twine BBNamePrefix = Twine("pred.") + I->getOpcodeName(); + PredicatedBlock->setName(BBNamePrefix + ".if"); + PredicatedBlock->getSingleSuccessor()->setName(BBNamePrefix + ".continue"); // If the instruction is non-void create a Phi node at reconvergence point. if (!I->getType()->isVoidTy()) { @@ -7324,8 +7329,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, VectorTy, VF - 1, VectorTy); - // TODO: IF-converted IFs become selects. - return 0; + // Phi nodes in non-header blocks (not inductions, reductions, etc.) are + // converted into select instructions. We require N - 1 selects per phi + // node, where N is the number of incoming values. + if (VF > 1 && Phi->getParent() != TheLoop->getHeader()) + return (Phi->getNumIncomingValues() - 1) * + TTI.getCmpSelInstrCost( + Instruction::Select, ToVectorTy(Phi->getType(), VF), + ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); + + return TTI.getCFInstrCost(Instruction::PHI); } case Instruction::UDiv: case Instruction::SDiv: diff --git a/contrib/llvm/lib/XRay/CMakeLists.txt b/contrib/llvm/lib/XRay/CMakeLists.txt deleted file mode 100644 index 8d558209d8ee..000000000000 --- a/contrib/llvm/lib/XRay/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -add_llvm_library(LLVMXRay - InstrumentationMap.cpp - Trace.cpp - - ADDITIONAL_HEADER_DIRS - ${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT - ${LLVM_MAIN_INCLUDE_DIR}/llvm/XRay - - DEPENDS - LLVMSupport - LLVMObject - - LINK_LIBS - LLVMSupport - LLVMObject - ) diff --git a/contrib/llvm/tools/clang/include/clang-c/Index.h b/contrib/llvm/tools/clang/include/clang-c/Index.h index 1ac2b7f802c2..9acc9b75bffb 100644 --- a/contrib/llvm/tools/clang/include/clang-c/Index.h +++ b/contrib/llvm/tools/clang/include/clang-c/Index.h @@ -5600,7 +5600,8 @@ typedef enum { CXIdxEntityLang_None = 0, CXIdxEntityLang_C = 1, CXIdxEntityLang_ObjC = 2, - CXIdxEntityLang_CXX = 3 + CXIdxEntityLang_CXX = 3, + CXIdxEntityLang_Swift = 4 } CXIdxEntityLanguage; /** diff --git a/contrib/llvm/tools/clang/include/clang/AST/CommentSema.h b/contrib/llvm/tools/clang/include/clang/AST/CommentSema.h index 6a803836e848..230e52739f24 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/CommentSema.h +++ b/contrib/llvm/tools/clang/include/clang/AST/CommentSema.h @@ -208,6 +208,10 @@ class Sema { /// \returns \c true if declaration that this comment is attached to declares /// a function pointer. bool isFunctionPointerVarDecl(); + /// \returns \c true if the declaration that this comment is attached to + /// declares a variable or a field whose type is a function or a block + /// pointer. + bool isFunctionOrBlockPointerVarLikeDecl(); bool isFunctionOrMethodVariadic(); bool isObjCMethodDecl(); bool isObjCPropertyDecl(); diff --git a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchers.h b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchers.h index fb029470c830..f11469b8fc20 100644 --- a/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/contrib/llvm/tools/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -1406,7 +1406,7 @@ const internal::VariadicDynCastAllOfMatcher< /// /// Example: Given /// \code -/// struct T {void func()}; +/// struct T {void func();}; /// T f(); /// void g(T); /// \endcode diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Diagnostic.td b/contrib/llvm/tools/clang/include/clang/Basic/Diagnostic.td index 39da0060ddbd..f25068eca132 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Diagnostic.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/Diagnostic.td @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +// See the Internals Manual, section The Diagnostics Subsystem for an overview. + // Define the diagnostic severities. class Severity { string Name = N; @@ -100,10 +102,20 @@ class SuppressInSystemHeader { class Error : Diagnostic, SFINAEFailure { bit ShowInSystemHeader = 1; } +// Warnings default to on (but can be default-off'd with DefaultIgnore). +// This is used for warnings about questionable code; warnings about +// accepted language extensions should use Extension or ExtWarn below instead. class Warning : Diagnostic; +// Remarks can be turned on with -R flags and provide commentary, e.g. on +// optimizer decisions. class Remark : Diagnostic; +// Extensions are warnings about accepted language extensions. +// Extension warnings are default-off but enabled by -pedantic. class Extension : Diagnostic; +// ExtWarns are warnings about accepted language extensions. +// ExtWarn warnings are default-on. class ExtWarn : Diagnostic; +// Notes can provide supplementary information on errors, warnings, and remarks. class Note : Diagnostic; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td index d95e43c10c55..f04ed8ed4ce6 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1114,14 +1114,12 @@ def err_pragma_cannot_end_force_cuda_host_device : Error< } // end of Parse Issue category. let CategoryName = "Modules Issue" in { -def err_expected_module_interface_decl : Error< - "expected module declaration at start of module interface">; def err_unexpected_module_decl : Error< - "module declaration must be the first declaration in the translation unit">; + "module declaration can only appear at the top level">; def err_module_expected_ident : Error< - "expected a module name after module%select{| import}0">; -def err_unexpected_module_kind : Error< - "unexpected module kind %0; expected 'implementation' or 'partition'">; + "expected a module name after '%select{module|import}0'">; +def err_module_implementation_partition : Error< + "module partition must be declared 'export'">; def err_attribute_not_module_attr : Error< "%0 attribute cannot be applied to a module">; def err_attribute_not_import_attr : Error< diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6cb872cc27c5..b5fed1f218b6 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8801,9 +8801,11 @@ def err_invalid_type_for_program_scope_var : Error< } let CategoryName = "Modules Issue" in { +def err_module_decl_in_module_map_module : Error< + "'module' declaration found while building module from module map">; def err_module_interface_implementation_mismatch : Error< - "%select{'module'|'module partition'|'module implementation'}0 declaration " - "found while %select{not |not |}0building module interface">; + "missing 'export' specifier in module declaration while " + "building module interface">; def err_current_module_name_mismatch : Error< "module name '%0' specified on command line does not match name of module">; def err_module_redefinition : Error< @@ -8846,8 +8848,13 @@ def err_module_self_import : Error< "import of module '%0' appears within same top-level module '%1'">; def err_module_import_in_implementation : Error< "@import of module '%0' in implementation of '%1'; use #import">; + +// C++ Modules TS def err_export_within_export : Error< "export declaration appears within another export declaration">; +def err_export_not_in_module_interface : Error< + "export declaration can only be used within a module interface unit after " + "the module declaration">; def ext_equivalent_internal_linkage_decl_in_modules : ExtWarn< "ambiguous use of internal linkage declaration %0 defined in multiple modules">, diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Module.h b/contrib/llvm/tools/clang/include/clang/Basic/Module.h index 8fcb0e8b056a..28aa7db52992 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Module.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/Module.h @@ -62,6 +62,18 @@ class Module { /// \brief The location of the module definition. SourceLocation DefinitionLoc; + enum ModuleKind { + /// \brief This is a module that was defined by a module map and built out + /// of header files. + ModuleMapModule, + + /// \brief This is a C++ Modules TS module interface unit. + ModuleInterfaceUnit + }; + + /// \brief The kind of this module. + ModuleKind Kind = ModuleMapModule; + /// \brief The parent of this module. This will be NULL for the top-level /// module. Module *Parent; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def b/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def index c574045e139a..f20d326e08f8 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/Sanitizers.def @@ -47,6 +47,9 @@ SANITIZER("kernel-address", KernelAddress) // MemorySanitizer SANITIZER("memory", Memory) +// libFuzzer +SANITIZER("fuzzer", Fuzzer) + // ThreadSanitizer SANITIZER("thread", Thread) diff --git a/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td b/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td index ba09bccfe1c5..63348c094c93 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td @@ -573,6 +573,8 @@ def fblocks_runtime_optional : Flag<["-"], "fblocks-runtime-optional">, HelpText<"Weakly link in the blocks runtime">; def fexternc_nounwind : Flag<["-"], "fexternc-nounwind">, HelpText<"Assume all functions with C linkage do not unwind">; +def enable_split_dwarf : Flag<["-"], "enable-split-dwarf">, + HelpText<"Use split dwarf/Fission">; def split_dwarf_file : Separate<["-"], "split-dwarf-file">, HelpText<"File name to use for split dwarf debug info output">; def fno_wchar : Flag<["-"], "fno-wchar">, diff --git a/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h b/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h index 2df8077d6da6..c7b3e8006dd5 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h +++ b/contrib/llvm/tools/clang/include/clang/Driver/SanitizerArgs.h @@ -50,6 +50,7 @@ class SanitizerArgs { bool needsSharedAsanRt() const { return AsanSharedRuntime; } bool needsTsanRt() const { return Sanitizers.has(SanitizerKind::Thread); } bool needsMsanRt() const { return Sanitizers.has(SanitizerKind::Memory); } + bool needsFuzzer() const { return Sanitizers.has(SanitizerKind::Fuzzer); } bool needsLsanRt() const { return Sanitizers.has(SanitizerKind::Leak) && !Sanitizers.has(SanitizerKind::Address); diff --git a/contrib/llvm/tools/clang/include/clang/Format/Format.h b/contrib/llvm/tools/clang/include/clang/Format/Format.h index 05daf4f897f4..9bed253baca2 100644 --- a/contrib/llvm/tools/clang/include/clang/Format/Format.h +++ b/contrib/llvm/tools/clang/include/clang/Format/Format.h @@ -1512,6 +1512,18 @@ llvm::Expected cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, const FormatStyle &Style); +/// \brief Represents the status of a formatting attempt. +struct FormattingAttemptStatus { + /// \brief A value of ``false`` means that any of the affected ranges were not + /// formatted due to a non-recoverable syntax error. + bool FormatComplete = true; + + /// \brief If ``FormatComplete`` is false, ``Line`` records a one-based + /// original line number at which a syntax error might have occurred. This is + /// based on a best-effort analysis and could be imprecise. + unsigned Line = 0; +}; + /// \brief Reformats the given \p Ranges in \p Code. /// /// Each range is extended on either end to its next bigger logic unit, i.e. @@ -1521,13 +1533,20 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, /// Returns the ``Replacements`` necessary to make all \p Ranges comply with /// \p Style. /// -/// If ``IncompleteFormat`` is non-null, its value will be set to true if any -/// of the affected ranges were not formatted due to a non-recoverable syntax -/// error. +/// If ``Status`` is non-null, its value will be populated with the status of +/// this formatting attempt. See \c FormattingAttemptStatus. tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, StringRef FileName = "", - bool *IncompleteFormat = nullptr); + FormattingAttemptStatus *Status = nullptr); + +/// \brief Same as above, except if ``IncompleteFormat`` is non-null, its value +/// will be set to true if any of the affected ranges were not formatted due to +/// a non-recoverable syntax error. +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + StringRef FileName, + bool *IncompleteFormat); /// \brief Clean up any erroneous/redundant code in the given \p Ranges in \p /// Code. diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def index 9565b22e7059..0b7d466ab3d4 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def +++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def @@ -199,6 +199,7 @@ CODEGENOPT(DebugTypeExtRefs, 1, 0) ///< Whether or not debug info should contain CODEGENOPT(DebugExplicitImport, 1, 0) ///< Whether or not debug info should ///< contain explicit imports for ///< anonymous namespaces +CODEGENOPT(EnableSplitDwarf, 1, 0) ///< Whether to enable split DWARF CODEGENOPT(SplitDwarfInlining, 1, 1) ///< Whether to include inlining info in the ///< skeleton CU to allow for symbolication ///< of inline stack frames without .dwo files. diff --git a/contrib/llvm/tools/clang/include/clang/Index/IndexSymbol.h b/contrib/llvm/tools/clang/include/clang/Index/IndexSymbol.h index bc34938fb405..abb132f9e4ce 100644 --- a/contrib/llvm/tools/clang/include/clang/Index/IndexSymbol.h +++ b/contrib/llvm/tools/clang/include/clang/Index/IndexSymbol.h @@ -59,6 +59,7 @@ enum class SymbolLanguage { C, ObjC, CXX, + Swift, }; /// Language specific sub-kinds. diff --git a/contrib/llvm/tools/clang/include/clang/Index/USRGeneration.h b/contrib/llvm/tools/clang/include/clang/Index/USRGeneration.h index 61f2c9d1ff13..8c661bd63cd8 100644 --- a/contrib/llvm/tools/clang/include/clang/Index/USRGeneration.h +++ b/contrib/llvm/tools/clang/include/clang/Index/USRGeneration.h @@ -30,10 +30,14 @@ static inline StringRef getUSRSpacePrefix() { bool generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf); /// \brief Generate a USR fragment for an Objective-C class. -void generateUSRForObjCClass(StringRef Cls, raw_ostream &OS); +void generateUSRForObjCClass(StringRef Cls, raw_ostream &OS, + StringRef ExtSymbolDefinedIn = "", + StringRef CategoryContextExtSymbolDefinedIn = ""); /// \brief Generate a USR fragment for an Objective-C class category. -void generateUSRForObjCCategory(StringRef Cls, StringRef Cat, raw_ostream &OS); +void generateUSRForObjCCategory(StringRef Cls, StringRef Cat, raw_ostream &OS, + StringRef ClsExtSymbolDefinedIn = "", + StringRef CatExtSymbolDefinedIn = ""); /// \brief Generate a USR fragment for an Objective-C instance variable. The /// complete USR can be created by concatenating the USR for the @@ -48,7 +52,15 @@ void generateUSRForObjCMethod(StringRef Sel, bool IsInstanceMethod, void generateUSRForObjCProperty(StringRef Prop, bool isClassProp, raw_ostream &OS); /// \brief Generate a USR fragment for an Objective-C protocol. -void generateUSRForObjCProtocol(StringRef Prot, raw_ostream &OS); +void generateUSRForObjCProtocol(StringRef Prot, raw_ostream &OS, + StringRef ExtSymbolDefinedIn = ""); + +/// Generate USR fragment for a global (non-nested) enum. +void generateUSRForGlobalEnum(StringRef EnumName, raw_ostream &OS, + StringRef ExtSymbolDefinedIn = ""); + +/// Generate a USR fragment for an enum constant. +void generateUSRForEnumConstant(StringRef EnumConstantName, raw_ostream &OS); /// \brief Generate a USR for a macro, including the USR prefix. /// diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h index bd68842c9f73..0f16cb975b2a 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h @@ -1934,7 +1934,8 @@ class Sema { /// The parser has processed a module-declaration that begins the definition /// of a module interface or implementation. - DeclGroupPtrTy ActOnModuleDecl(SourceLocation ModuleLoc, ModuleDeclKind MDK, + DeclGroupPtrTy ActOnModuleDecl(SourceLocation StartLoc, + SourceLocation ModuleLoc, ModuleDeclKind MDK, ModuleIdPath Path); /// \brief The parser has processed a module import declaration. @@ -8326,6 +8327,12 @@ class Sema { /// Returns OpenMP nesting level for current directive. unsigned getOpenMPNestingLevel() const; + /// Push new OpenMP function region for non-capturing function. + void pushOpenMPFunctionRegion(); + + /// Pop OpenMP function region for non-capturing function. + void popOpenMPFunctionRegion(const sema::FunctionScopeInfo *OldFSI); + /// Checks if a type or a declaration is disabled due to the owning extension /// being disabled, and emits diagnostic messages if it is disabled. /// \param D type or declaration to be checked. diff --git a/contrib/llvm/tools/clang/lib/AST/Comment.cpp b/contrib/llvm/tools/clang/lib/AST/Comment.cpp index 7a7d3dd8304e..dfa2a1665d40 100644 --- a/contrib/llvm/tools/clang/lib/AST/Comment.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Comment.cpp @@ -116,6 +116,9 @@ bool ParagraphComment::isWhitespaceNoCache() const { static TypeLoc lookThroughTypedefOrTypeAliasLocs(TypeLoc &SrcTL) { TypeLoc TL = SrcTL.IgnoreParens(); + // Look through attribute types. + if (AttributedTypeLoc AttributeTL = TL.getAs()) + return AttributeTL.getModifiedLoc(); // Look through qualified types. if (QualifiedTypeLoc QualifiedTL = TL.getAs()) return QualifiedTL.getUnqualifiedLoc(); @@ -280,8 +283,25 @@ void DeclInfo::fill() { case Decl::EnumConstant: case Decl::ObjCIvar: case Decl::ObjCAtDefsField: + case Decl::ObjCProperty: { + const TypeSourceInfo *TSI; + if (const auto *VD = dyn_cast(CommentDecl)) + TSI = VD->getTypeSourceInfo(); + else if (const auto *PD = dyn_cast(CommentDecl)) + TSI = PD->getTypeSourceInfo(); + else + TSI = nullptr; + if (TSI) { + TypeLoc TL = TSI->getTypeLoc().getUnqualifiedLoc(); + FunctionTypeLoc FTL; + if (getFunctionTypeLoc(TL, FTL)) { + ParamVars = FTL.getParams(); + ReturnType = FTL.getReturnLoc().getType(); + } + } Kind = VariableKind; break; + } case Decl::Namespace: Kind = NamespaceKind; break; diff --git a/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp b/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp index d39a9b26b2a8..403454d3ab7e 100644 --- a/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp +++ b/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp @@ -86,7 +86,7 @@ ParamCommandComment *Sema::actOnParamCommandStart( new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID, CommandMarker); - if (!isFunctionDecl()) + if (!isFunctionDecl() && !isFunctionOrBlockPointerVarLikeDecl()) Diag(Command->getLocation(), diag::warn_doc_param_not_attached_to_a_function_decl) << CommandMarker @@ -584,7 +584,11 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) { assert(ThisDeclInfo && "should not call this check on a bare comment"); - if (isFunctionDecl()) { + // We allow the return command for all @properties because it can be used + // to document the value that the property getter returns. + if (isObjCPropertyDecl()) + return; + if (isFunctionDecl() || isFunctionOrBlockPointerVarLikeDecl()) { if (ThisDeclInfo->ReturnType->isVoidType()) { unsigned DiagKind; switch (ThisDeclInfo->CommentDecl->getKind()) { @@ -610,8 +614,6 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) { } return; } - else if (isObjCPropertyDecl()) - return; Diag(Command->getLocation(), diag::warn_doc_returns_not_attached_to_a_function_decl) @@ -844,6 +846,30 @@ bool Sema::isFunctionPointerVarDecl() { return false; } +bool Sema::isFunctionOrBlockPointerVarLikeDecl() { + if (!ThisDeclInfo) + return false; + if (!ThisDeclInfo->IsFilled) + inspectThisDecl(); + if (ThisDeclInfo->getKind() != DeclInfo::VariableKind || + !ThisDeclInfo->CurrentDecl) + return false; + QualType QT; + if (const auto *VD = dyn_cast(ThisDeclInfo->CurrentDecl)) + QT = VD->getType(); + else if (const auto *PD = + dyn_cast(ThisDeclInfo->CurrentDecl)) + QT = PD->getType(); + else + return false; + // We would like to warn about the 'returns'/'param' commands for + // variables that don't directly specify the function type, so type aliases + // can be ignored. + if (QT->getAs()) + return false; + return QT->isFunctionPointerType() || QT->isBlockPointerType(); +} + bool Sema::isObjCPropertyDecl() { if (!ThisDeclInfo) return false; diff --git a/contrib/llvm/tools/clang/lib/AST/Decl.cpp b/contrib/llvm/tools/clang/lib/AST/Decl.cpp index 094e8dcff088..0f2558e24ba5 100644 --- a/contrib/llvm/tools/clang/lib/AST/Decl.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Decl.cpp @@ -2251,6 +2251,14 @@ bool VarDecl::checkInitIsICE() const { return Eval->IsICE; } +template +static DeclT *getDefinitionOrSelf(DeclT *D) { + assert(D); + if (auto *Def = D->getDefinition()) + return Def; + return D; +} + VarDecl *VarDecl::getTemplateInstantiationPattern() const { // If it's a variable template specialization, find the template or partial // specialization from which it was instantiated. @@ -2262,7 +2270,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { break; VTD = NewVTD; } - return VTD->getTemplatedDecl()->getDefinition(); + return getDefinitionOrSelf(VTD->getTemplatedDecl()); } if (auto *VTPSD = From.dyn_cast()) { @@ -2271,7 +2279,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { break; VTPSD = NewVTPSD; } - return VTPSD->getDefinition(); + return getDefinitionOrSelf(VTPSD); } } @@ -2280,23 +2288,18 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { VarDecl *VD = getInstantiatedFromStaticDataMember(); while (auto *NewVD = VD->getInstantiatedFromStaticDataMember()) VD = NewVD; - return VD->getDefinition(); + return getDefinitionOrSelf(VD); } } if (VarTemplateDecl *VarTemplate = getDescribedVarTemplate()) { - while (VarTemplate->getInstantiatedFromMemberTemplate()) { if (VarTemplate->isMemberSpecialization()) break; VarTemplate = VarTemplate->getInstantiatedFromMemberTemplate(); } - assert((!VarTemplate->getTemplatedDecl() || - !isTemplateInstantiation(getTemplateSpecializationKind())) && - "couldn't find pattern for variable instantiation"); - - return VarTemplate->getTemplatedDecl(); + return getDefinitionOrSelf(VarTemplate->getTemplatedDecl()); } return nullptr; } @@ -3198,9 +3201,12 @@ bool FunctionDecl::isTemplateInstantiation() const { FunctionDecl *FunctionDecl::getTemplateInstantiationPattern() const { // Handle class scope explicit specialization special case. - if (getTemplateSpecializationKind() == TSK_ExplicitSpecialization) - return getClassScopeSpecializationPattern(); - + if (getTemplateSpecializationKind() == TSK_ExplicitSpecialization) { + if (auto *Spec = getClassScopeSpecializationPattern()) + return getDefinitionOrSelf(Spec); + return nullptr; + } + // If this is a generic lambda call operator specialization, its // instantiation pattern is always its primary template's pattern // even if its primary template was instantiated from another @@ -3212,16 +3218,10 @@ FunctionDecl *FunctionDecl::getTemplateInstantiationPattern() const { if (isGenericLambdaCallOperatorSpecialization( dyn_cast(this))) { - assert(getPrimaryTemplate() && "A generic lambda specialization must be " - "generated from a primary call operator " - "template"); - assert(getPrimaryTemplate()->getTemplatedDecl()->getBody() && - "A generic lambda call operator template must always have a body - " - "even if instantiated from a prototype (i.e. as written) member " - "template"); - return getPrimaryTemplate()->getTemplatedDecl(); + assert(getPrimaryTemplate() && "not a generic lambda call operator?"); + return getDefinitionOrSelf(getPrimaryTemplate()->getTemplatedDecl()); } - + if (FunctionTemplateDecl *Primary = getPrimaryTemplate()) { while (Primary->getInstantiatedFromMemberTemplate()) { // If we have hit a point where the user provided a specialization of @@ -3230,11 +3230,14 @@ FunctionDecl *FunctionDecl::getTemplateInstantiationPattern() const { break; Primary = Primary->getInstantiatedFromMemberTemplate(); } - - return Primary->getTemplatedDecl(); + + return getDefinitionOrSelf(Primary->getTemplatedDecl()); } - - return getInstantiatedFromMemberFunction(); + + if (auto *MFD = getInstantiatedFromMemberFunction()) + return getDefinitionOrSelf(MFD); + + return nullptr; } FunctionTemplateDecl *FunctionDecl::getPrimaryTemplate() const { @@ -3778,7 +3781,7 @@ EnumDecl *EnumDecl::getTemplateInstantiationPattern() const { EnumDecl *ED = getInstantiatedFromMemberEnum(); while (auto *NewED = ED->getInstantiatedFromMemberEnum()) ED = NewED; - return ED; + return getDefinitionOrSelf(ED); } } diff --git a/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp b/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp index 2e5cec9c108f..dd8f768c5711 100644 --- a/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp @@ -1364,6 +1364,13 @@ CXXRecordDecl::setTemplateSpecializationKind(TemplateSpecializationKind TSK) { } const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { + auto GetDefinitionOrSelf = + [](const CXXRecordDecl *D) -> const CXXRecordDecl * { + if (auto *Def = D->getDefinition()) + return Def; + return D; + }; + // If it's a class template specialization, find the template or partial // specialization from which it was instantiated. if (auto *TD = dyn_cast(this)) { @@ -1374,7 +1381,7 @@ const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { break; CTD = NewCTD; } - return CTD->getTemplatedDecl()->getDefinition(); + return GetDefinitionOrSelf(CTD->getTemplatedDecl()); } if (auto *CTPSD = From.dyn_cast()) { @@ -1383,7 +1390,7 @@ const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { break; CTPSD = NewCTPSD; } - return CTPSD->getDefinition(); + return GetDefinitionOrSelf(CTPSD); } } @@ -1392,7 +1399,7 @@ const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { const CXXRecordDecl *RD = this; while (auto *NewRD = RD->getInstantiatedFromMemberClass()) RD = NewRD; - return RD->getDefinition(); + return GetDefinitionOrSelf(RD); } } diff --git a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp index 9218eb537a60..a12a38033c4a 100644 --- a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp +++ b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp @@ -539,9 +539,18 @@ void ObjCInterfaceDecl::getDesignatedInitializers( bool ObjCInterfaceDecl::isDesignatedInitializer(Selector Sel, const ObjCMethodDecl **InitMethod) const { + bool HasCompleteDef = isThisDeclarationADefinition(); + // During deserialization the data record for the ObjCInterfaceDecl could + // be made invariant by reusing the canonical decl. Take this into account + // when checking for the complete definition. + if (!HasCompleteDef && getCanonicalDecl()->hasDefinition() && + getCanonicalDecl()->getDefinition() == getDefinition()) + HasCompleteDef = true; + // Check for a complete definition and recover if not so. - if (!isThisDeclarationADefinition()) + if (!HasCompleteDef) return false; + if (data().ExternallyCompleted) LoadExternalDefinition(); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp index 20059d922f90..85788b427208 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp @@ -129,16 +129,20 @@ class EmitAssemblyHelper { // that we add to the PassManagerBuilder. class PassManagerBuilderWrapper : public PassManagerBuilder { public: - PassManagerBuilderWrapper(const CodeGenOptions &CGOpts, + PassManagerBuilderWrapper(const Triple &TargetTriple, + const CodeGenOptions &CGOpts, const LangOptions &LangOpts) - : PassManagerBuilder(), CGOpts(CGOpts), LangOpts(LangOpts) {} + : PassManagerBuilder(), TargetTriple(TargetTriple), CGOpts(CGOpts), + LangOpts(LangOpts) {} + const Triple &getTargetTriple() const { return TargetTriple; } const CodeGenOptions &getCGOpts() const { return CGOpts; } const LangOptions &getLangOpts() const { return LangOpts; } + private: + const Triple &TargetTriple; const CodeGenOptions &CGOpts; const LangOptions &LangOpts; }; - } static void addObjCARCAPElimPass(const PassManagerBuilder &Builder, PassManagerBase &PM) { @@ -185,16 +189,35 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, PM.add(createSanitizerCoverageModulePass(Opts)); } +// Check if ASan should use GC-friendly instrumentation for globals. +// First of all, there is no point if -fdata-sections is off (expect for MachO, +// where this is not a factor). Also, on ELF this feature requires an assembler +// extension that only works with -integrated-as at the moment. +static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { + switch (T.getObjectFormat()) { + case Triple::MachO: + case Triple::COFF: + return true; + case Triple::ELF: + return CGOpts.DataSections && !CGOpts.DisableIntegratedAS; + default: + return false; + } +} + static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { const PassManagerBuilderWrapper &BuilderWrapper = static_cast(Builder); + const Triple &T = BuilderWrapper.getTargetTriple(); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address); bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope; + bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); - PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover)); + PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, + UseGlobalsGC)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -407,6 +430,8 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); + if (CodeGenOpts.EnableSplitDwarf) + Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; @@ -434,8 +459,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (CodeGenOpts.DisableLLVMPasses) return; - PassManagerBuilderWrapper PMBuilder(CodeGenOpts, LangOpts); - // Figure out TargetLibraryInfo. This needs to be added to MPM and FPM // manually (and not via PMBuilder), since some passes (eg. InstrProfiling) // are inserted before PMBuilder ones - they'd get the default-constructed @@ -444,6 +467,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, std::unique_ptr TLII( createTLII(TargetTriple, CodeGenOpts)); + PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts); + // At O0 and O1 we only run the always inliner which is more efficient. At // higher optimization levels we run the normal inliner. if (CodeGenOpts.OptimizationLevel <= 1) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 2f6a2b95fb61..dd32a44393c6 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -528,12 +528,14 @@ void CGDebugInfo::CreateCompileUnit() { // Create new compile unit. // FIXME - Eliminate TheCU. TheCU = DBuilder.createCompileUnit( - LangTag, DBuilder.createFile(remapDIPath(MainFileName), - remapDIPath(getCurrentDirname()), CSKind, - Checksum), + LangTag, + DBuilder.createFile(remapDIPath(MainFileName), + remapDIPath(getCurrentDirname()), CSKind, Checksum), Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, - CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */, - CGM.getCodeGenOpts().SplitDwarfInlining, + CGM.getCodeGenOpts().EnableSplitDwarf + ? "" + : CGM.getCodeGenOpts().SplitDwarfFile, + EmissionKind, 0 /* DWOid */, CGM.getCodeGenOpts().SplitDwarfInlining, CGM.getCodeGenOpts().DebugInfoForProfiling); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp index 719147a58e08..d0aacf65428f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp @@ -533,15 +533,6 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const { SanOpts.has(SanitizerKind::Vptr); } -/// Check if a runtime null check for \p Ptr can be omitted. -static bool canOmitPointerNullCheck(llvm::Value *Ptr) { - // Note: do not perform any constant-folding in this function. That is best - // left to the IR builder. - - // Pointers to alloca'd memory are non-null. - return isa(Ptr->stripPointerCastsNoFollowAliases()); -} - void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *Ptr, QualType Ty, CharUnits Alignment, @@ -560,11 +551,16 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, SmallVector, 3> Checks; llvm::BasicBlock *Done = nullptr; + // Quickly determine whether we have a pointer to an alloca. It's possible + // to skip null checks, and some alignment checks, for these pointers. This + // can reduce compile-time significantly. + auto PtrToAlloca = + dyn_cast(Ptr->stripPointerCastsNoFollowAliases()); + bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast || TCK == TCK_UpcastToVirtualBase; if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) && - !SkippedChecks.has(SanitizerKind::Null) && - !canOmitPointerNullCheck(Ptr)) { + !SkippedChecks.has(SanitizerKind::Null) && !PtrToAlloca) { // The glvalue must not be an empty glvalue. llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr); @@ -617,7 +613,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity(); // The glvalue must be suitably aligned. - if (AlignVal > 1) { + if (AlignVal > 1 && + (!PtrToAlloca || PtrToAlloca->getAlignment() < AlignVal)) { llvm::Value *Align = Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy), llvm::ConstantInt::get(IntPtrTy, AlignVal - 1)); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 874b6a69e513..d1a706b8821e 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2466,16 +2466,14 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, return Schedule | Modifier; } -void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, - SourceLocation Loc, - const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, - bool Ordered, llvm::Value *UB, - llvm::Value *Chunk) { +void CGOpenMPRuntime::emitForDispatchInit( + CodeGenFunction &CGF, SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, + bool Ordered, const DispatchRTInput &DispatchValues) { if (!CGF.HaveInsertPoint()) return; - OpenMPSchedType Schedule = - getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); + OpenMPSchedType Schedule = getRuntimeSchedule( + ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && @@ -2486,14 +2484,14 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, // kmp_int[32|64] stride, kmp_int[32|64] chunk); // If the Chunk was not specified in the clause - use default value 1. - if (Chunk == nullptr) - Chunk = CGF.Builder.getIntN(IVSize, 1); + llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk + : CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - CGF.Builder.getIntN(IVSize, 0), // Lower - UB, // Upper + DispatchValues.LB, // Lower + DispatchValues.UB, // Upper CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h index 7901a6b7a8fc..6f460f121791 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -672,16 +672,50 @@ class CGOpenMPRuntime { /// virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const; + /// struct with the values to be passed to the dispatch runtime function + struct DispatchRTInput { + /// Loop lower bound + llvm::Value *LB = nullptr; + /// Loop upper bound + llvm::Value *UB = nullptr; + /// Chunk size specified using 'schedule' clause (nullptr if chunk + /// was not specified) + llvm::Value *Chunk = nullptr; + DispatchRTInput() = default; + DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk) + : LB(LB), UB(UB), Chunk(Chunk) {} + }; + + /// Call the appropriate runtime routine to initialize it before start + /// of loop. + + /// This is used for non static scheduled types and when the ordered + /// clause is present on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime + /// routine before start of the OpenMP loop to get the loop upper / lower + /// bounds \a LB and \a UB and stride \a ST. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the interation variable. + /// \param Ordered true if loop is ordered, false otherwise. + /// \param DispatchValues struct containing llvm values for lower bound, upper + /// bound, and chunk expression. + /// For the default (nullptr) value, the chunk 1 will be used. + /// virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, - llvm::Value *UB, - llvm::Value *Chunk = nullptr); + const DispatchRTInput &DispatchValues); /// \brief Call the appropriate runtime routine to initialize it before start /// of loop. /// - /// Depending on the loop schedule, it is nesessary to call some runtime + /// This is used only in case of static schedule, when the user did not + /// specify a ordered clause on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime /// routine before start of the OpenMP loop to get the loop upper / lower /// bounds \a LB and \a UB and stride \a ST. /// diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index 22269e42c7a0..f738dd0750fa 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -87,7 +87,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { class OMPParallelScope final : public OMPLexicalScope { bool EmitPreInitStmt(const OMPExecutableDirective &S) { OpenMPDirectiveKind Kind = S.getDirectiveKind(); - return !isOpenMPTargetExecutionDirective(Kind) && + return !(isOpenMPTargetExecutionDirective(Kind) || + isOpenMPLoopBoundSharingDirective(Kind)) && isOpenMPParallelDirective(Kind); } @@ -1249,10 +1250,20 @@ static void emitPostUpdateForReductionClause( CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &S, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) { +namespace { +/// Codegen lambda for appending distribute lower and upper bounds to outlined +/// parallel function. This is necessary for combined constructs such as +/// 'distribute parallel for' +typedef llvm::function_ref &)> + CodeGenBoundParametersTy; +} // anonymous namespace + +static void emitCommonOMPParallelDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const CodeGenBoundParametersTy &CodeGenBoundParameters) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -1279,11 +1290,20 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, OMPParallelScope Scope(CGF, S); llvm::SmallVector CapturedVars; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk + // lower and upper bounds with the pragma 'for' chunking mechanism. + // The following lambda takes care of appending the lower and upper bound + // parameters when necessary + CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } +static void emitEmptyBoundParameters(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl &) {} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -1304,7 +1324,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1649,6 +1670,13 @@ void CodeGenFunction::EmitOMPSimdFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } +static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); +}; + void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); @@ -1731,9 +1759,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { +void CodeGenFunction::EmitOMPOuterLoop( + bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, + CodeGenFunction::OMPPrivateScope &LoopScope, + const CodeGenFunction::OMPLoopArguments &LoopArgs, + const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, + const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { auto &RT = CGM.getOpenMPRuntime(); const Expr *IVExpr = S.getIterationVariable(); @@ -1751,15 +1782,18 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { - // UB = min(UB, GlobalUB) - EmitIgnoredExpr(S.getEnsureUpperBound()); + // UB = min(UB, GlobalUB) or + // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. + // 'distribute parallel for') + EmitIgnoredExpr(LoopArgs.EUB); // IV = LB - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // IV < UB - BoolCondVal = EvaluateExprAsBool(S.getCond()); + BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); } else { - BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, - LB, UB, ST); + BoolCondVal = + RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); } // If there are any cleanups between here and the loop-exit scope, @@ -1779,7 +1813,7 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). if (DynamicOrOrdered) - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); @@ -1793,24 +1827,27 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { - if (Ordered) { - CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( - CGF, Loc, IVSize, IVSigned); - } - }); + + // when 'distribute' is not combined with a 'for': + // while (idx <= UB) { BODY; ++idx; } + // when 'distribute' is combined with a 'for' + // (e.g. 'distribute parallel for') + // while (idx <= UB) { ; idx += ST; } + EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { + CodeGenOrdered(CGF, Loc, IVSize, IVSigned); + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". - EmitIgnoredExpr(S.getNextLowerBound()); - EmitIgnoredExpr(S.getNextUpperBound()); + EmitIgnoredExpr(LoopArgs.NextLB); + EmitIgnoredExpr(LoopArgs.NextUB); } EmitBranch(CondBlock); @@ -1829,7 +1866,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, void CodeGenFunction::EmitOMPForOuterLoop( const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1838,7 +1876,7 @@ void CodeGenFunction::EmitOMPForOuterLoop( assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, - /*Chunked=*/Chunk != nullptr)) && + LoopArgs.Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -1896,22 +1934,46 @@ void CodeGenFunction::EmitOMPForOuterLoop( const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { - llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); + auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); + llvm::Value *LBVal = DispatchBounds.first; + llvm::Value *UBVal = DispatchBounds.second; + CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, + LoopArgs.Chunk}; RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, Ordered, UBVal, Chunk); + IVSigned, Ordered, DipatchRTInputValues); } else { RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, IL, LB, UB, ST, Chunk); + Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, + LoopArgs.ST, LoopArgs.Chunk); } - EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, - ST, IL, Chunk); + auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, + const unsigned IVSize, + const bool IVSigned) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, + IVSigned); + } + }; + + OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); + OuterLoopArgs.IncExpr = S.getInc(); + OuterLoopArgs.Init = S.getInit(); + OuterLoopArgs.Cond = S.getCond(); + OuterLoopArgs.NextLB = S.getNextLowerBound(); + OuterLoopArgs.NextUB = S.getNextUpperBound(); + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, + emitOMPLoopBodyWithStopPoint, CodeGenOrdered); } +static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, + const unsigned IVSize, const bool IVSigned) {} + void CodeGenFunction::EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, - const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent) { auto &RT = CGM.getOpenMPRuntime(); @@ -1924,26 +1986,159 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, /* Ordered = */ false, - IL, LB, UB, ST, Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, + IVSigned, /* Ordered = */ false, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.Chunk); - EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, - S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); + // for combined 'distribute' and 'for' the increment expression of distribute + // is store in DistInc. For 'distribute' alone, it is in Inc. + Expr *IncExpr; + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) + IncExpr = S.getDistInc(); + else + IncExpr = S.getInc(); + + // this routine is shared by 'omp distribute parallel for' and + // 'omp distribute': select the right EUB expression depending on the + // directive + OMPLoopArguments OuterLoopArgs; + OuterLoopArgs.LB = LoopArgs.LB; + OuterLoopArgs.UB = LoopArgs.UB; + OuterLoopArgs.ST = LoopArgs.ST; + OuterLoopArgs.IL = LoopArgs.IL; + OuterLoopArgs.Chunk = LoopArgs.Chunk; + OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound(); + OuterLoopArgs.IncExpr = IncExpr; + OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit(); + OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextLowerBound() + : S.getNextLowerBound(); + OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextUpperBound() + : S.getNextUpperBound(); + + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, + LoopScope, OuterLoopArgs, CodeGenLoopContent, + emitEmptyOrdered); +} + +/// Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + +static std::pair +emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast(S); + LValue LB = + EmitOMPHelperVar(CGF, cast(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast(LS.getUpperBoundVariable())); + + // When composing 'distribute' with 'for' (e.g. as in 'distribute + // parallel for') we need to use the 'distribute' + // chunk lower and upper bounds rather than the whole loop iteration + // space. These are parameters to the outlined function for 'parallel' + // and we copy the bounds of the previous schedule into the + // the current ones. + LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); + LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); + llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); + PrevLBVal = CGF.EmitScalarConversion( + PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); + PrevUBVal = CGF.EmitScalarConversion( + PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + + CGF.EmitStoreOfScalar(PrevLBVal, LB); + CGF.EmitStoreOfScalar(PrevUBVal, UB); + + return {LB, UB}; +} + +/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then +/// we need to use the LB and UB expressions generated by the worksharing +/// code generation support, whereas in non combined situations we would +/// just emit 0 and the LastIteration expression +/// This function is necessary due to the difference of the LB and UB +/// types for the RT emission routines for 'for_static_init' and +/// 'for_dispatch_init' +static std::pair +emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast(S); + const Expr *IVExpr = LS.getIterationVariable(); + // when implementing a dynamic schedule for a 'for' combined with a + // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop + // is not normalized as each team only executes its own assigned + // distribute chunk + QualType IteratorTy = IVExpr->getType(); + llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + return {LBVal, UBVal}; +}; + +static void emitDistributeParallelForDistributeInnerBoundParams( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + llvm::SmallVectorImpl &CapturedVars) { + const auto &Dir = cast(S); + LValue LB = + CGF.EmitLValue(cast(Dir.getCombinedLowerBoundVariable())); + auto LBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(LBCast); + LValue UB = + CGF.EmitLValue(cast(Dir.getCombinedUpperBoundVariable())); + + auto UBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(UBCast); +}; + +static void +emitInnerParallelForWhenCombined(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), + emitDistributeParallelForInnerBounds, + emitDistributeParallelForDispatchBounds); + }; + + emitCommonOMPParallelDirective( + CGF, S, OMPD_for, CGInlinedWorksharingLoop, + emitDistributeParallelForDistributeInnerBoundParams); } void CodeGenFunction::EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_parallel_for, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGF.EmitStmt( - cast(S.getAssociatedStmt())->getCapturedStmt()); - }); + OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, + /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, + /*HasCancel=*/false); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( @@ -2081,14 +2276,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( }); } -/// \brief Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2101,7 +2288,10 @@ namespace { }; } // namespace -bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { +bool CodeGenFunction::EmitOMPWorksharingLoop( + const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { // Emit the loop iteration variable. auto IVExpr = cast(S.getIterationVariable()); auto IVDecl = cast(IVExpr->getDecl()); @@ -2151,10 +2341,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); + + std::pair Bounds = CodeGenLoopBounds(*this, S); + LValue LB = Bounds.first; + LValue UB = Bounds.second; LValue ST = EmitOMPHelperVar(*this, cast(S.getStrideVariable())); LValue IL = @@ -2240,9 +2430,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. + const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), + ST.getAddress(), IL.getAddress(), + Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + LoopArguments, CGDispatchBounds); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, @@ -2280,12 +2472,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { return HasLastprivateClause; } +/// The following two functions generate expressions for the loop lower +/// and upper bounds in case of static and dynamic (dispatch) schedule +/// of the associated 'for' or 'distribute' loop. +static std::pair +emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast(S); + LValue LB = + EmitOMPHelperVar(CGF, cast(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast(LS.getUpperBoundVariable())); + return {LB, UB}; +} + +/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not +/// consider the lower and upper bound expressions generated by the +/// worksharing loop support, but we use 0 and the iteration space size as +/// constants +static std::pair +emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast(S); + const Expr *IVExpr = LS.getIterationVariable(); + const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); + llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); + llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); + return {LBVal, UBVal}; +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2303,7 +2525,9 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2554,9 +2778,11 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -2564,9 +2790,11 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -2576,7 +2804,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -2794,7 +3023,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } -void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { +void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, + Expr *IncExpr) { // Emit the loop iteration variable. auto IVExpr = cast(S.getIterationVariable()); auto IVDecl = cast(IVExpr->getDecl()); @@ -2835,10 +3066,17 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { // Emit 'then' code. { // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast(S.getUpperBoundVariable())); + + LValue LB = EmitOMPHelperVar( + *this, cast( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedLowerBoundVariable() + : S.getLowerBoundVariable()))); + LValue UB = EmitOMPHelperVar( + *this, cast( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedUpperBoundVariable() + : S.getUpperBoundVariable()))); LValue ST = EmitOMPHelperVar(*this, cast(S.getStrideVariable())); LValue IL = @@ -2890,15 +3128,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound()); // IV = LB; - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit()); + + Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + + // for distribute alone, codegen // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); + // when combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { ; idx += ST; } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); }, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); @@ -2907,9 +3155,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + const OMPLoopArguments LoopArguments = { + LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), + Chunk}; + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, + CodeGenLoop); } // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -2931,7 +3181,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S); + + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, @@ -3840,7 +4091,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h index fa72019eb08b..1ded824ba5b0 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h @@ -175,6 +175,25 @@ class CodeGenFunction : public CodeGenTypeCache { // because of jumps. VarBypassDetector Bypasses; + // CodeGen lambda for loops and support for ordered clause + typedef llvm::function_ref + CodeGenLoopTy; + typedef llvm::function_ref + CodeGenOrderedTy; + + // Codegen lambda for loop bounds in worksharing loop constructs + typedef llvm::function_ref( + CodeGenFunction &, const OMPExecutableDirective &S)> + CodeGenLoopBoundsTy; + + // Codegen lambda for loop bounds in dispatch-based loop implementation + typedef llvm::function_ref( + CodeGenFunction &, const OMPExecutableDirective &S, Address LB, + Address UB)> + CodeGenDispatchBoundsTy; + /// \brief CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, @@ -2756,7 +2775,6 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); - void EmitOMPDistributeLoop(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); void EmitOMPDistributeParallelForSimdDirective( @@ -2813,32 +2831,78 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S, OMPPrivateScope &LoopScope); + /// Helper for the OpenMP loop directives. + void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); + + /// \brief Emit code for the worksharing loop-based directive. + /// \return true, if this construct has any lastprivate clause, false - + /// otherwise. + bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds); + private: /// Helpers for blocks llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); /// Helpers for the OpenMP loop directives. - void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal( const OMPLoopDirective &D, const llvm::function_ref &CondGen); - /// \brief Emit code for the worksharing loop-based directive. - /// \return true, if this construct has any lastprivate clause, false - - /// otherwise. - bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); - void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + + void EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); + + /// struct with the values to be passed to the OpenMP loop-related functions + struct OMPLoopArguments { + /// loop lower bound + Address LB = Address::invalid(); + /// loop upper bound + Address UB = Address::invalid(); + /// loop stride + Address ST = Address::invalid(); + /// isLastIteration argument for runtime functions + Address IL = Address::invalid(); + /// Chunk value generated by sema + llvm::Value *Chunk = nullptr; + /// EnsureUpperBound + Expr *EUB = nullptr; + /// IncrementExpression + Expr *IncExpr = nullptr; + /// Loop initialization + Expr *Init = nullptr; + /// Loop exit condition + Expr *Cond = nullptr; + /// Update of LB after a whole chunk has been executed + Expr *NextLB = nullptr; + /// Update of UB after a whole chunk has been executed + Expr *NextUB = nullptr; + OMPLoopArguments() = default; + OMPLoopArguments(Address LB, Address UB, Address ST, Address IL, + llvm::Value *Chunk = nullptr, Expr *EUB = nullptr, + Expr *IncExpr = nullptr, Expr *Init = nullptr, + Expr *Cond = nullptr, Expr *NextLB = nullptr, + Expr *NextUB = nullptr) + : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB), + IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB), + NextUB(NextUB) {} + }; + void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, + const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoop, + const CodeGenOrderedTy &CodeGenOrdered); void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, bool Ordered, Address LB, - Address UB, Address ST, Address IL, - llvm::Value *Chunk); - void EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, - const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + OMPPrivateScope &LoopScope, bool Ordered, + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds); + void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind, + const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, + const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent); /// \brief Emit code for sections directive. void EmitSections(const OMPExecutableDirective &S); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp index 19203973ff1b..25d32f19d0e5 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -565,12 +565,8 @@ void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, void CodeGenModule::DecorateInstructionWithInvariantGroup( llvm::Instruction *I, const CXXRecordDecl *RD) { - llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); - auto *MetaDataNode = dyn_cast(MD); - // Check if we have to wrap MDString in MDNode. - if (!MetaDataNode) - MetaDataNode = llvm::MDNode::get(getLLVMContext(), MD); - I->setMetadata(llvm::LLVMContext::MD_invariant_group, MetaDataNode); + I->setMetadata(llvm::LLVMContext::MD_invariant_group, + llvm::MDNode::get(getLLVMContext(), {})); } void CodeGenModule::Error(SourceLocation loc, StringRef message) { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp index 6acedc033a6e..9e193531d0f6 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp @@ -666,7 +666,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { } bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) { - if (SkipCoverageMapping) + if (!D->getBody()) return true; // Don't map the functions in system headers. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h index 0026df570bce..0759e65388b8 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h @@ -40,14 +40,11 @@ class CodeGenPGO { std::unique_ptr ProfRecord; std::vector RegionCounts; uint64_t CurrentRegionCount; - /// \brief A flag that is set to true when this function doesn't need - /// to have coverage mapping data. - bool SkipCoverageMapping; public: CodeGenPGO(CodeGenModule &CGM) - : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), - FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {} + : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0), + CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been diff --git a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp index 8e61aadbf326..c9561367a3a8 100644 --- a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp @@ -265,6 +265,10 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, Add &= ~InvalidTrappingKinds; Add &= Supported; + // Enable coverage if the fuzzing flag is set. + if (Add & Fuzzer) + CoverageFeatures |= CoverageTracePCGuard | CoverageIndirCall | CoverageTraceCmp; + Kinds |= Add; } else if (Arg->getOption().matches(options::OPT_fno_sanitize_EQ)) { Arg->claim(); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp index 49708e7d7242..6e1e4ccf44f0 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp @@ -2778,8 +2778,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-split-dwarf-inlining"); if (DebugInfoKind == codegenoptions::NoDebugInfo) DebugInfoKind = codegenoptions::LimitedDebugInfo; - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-split-dwarf=Enable"); + CmdArgs.push_back("-enable-split-dwarf"); } // After we've dealt with all combinations of things that could diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp index 93b66eb6954a..5e360f62e21a 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -577,6 +577,17 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, StaticRuntimes.push_back("esan"); } +static void addLibFuzzerRuntime(const ToolChain &TC, + const ArgList &Args, + ArgStringList &CmdArgs) { + StringRef ParentDir = llvm::sys::path::parent_path(TC.getDriver().InstalledDir); + SmallString<128> P(ParentDir); + llvm::sys::path::append(P, "lib", "libLLVMFuzzer.a"); + CmdArgs.push_back(Args.MakeArgString(P)); + TC.AddCXXStdlibLibArgs(Args, CmdArgs); +} + + // Should be called before we add system libraries (C++ ABI, libstdc++/libc++, // C runtime, etc). Returns true if sanitizer system deps need to be linked in. bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, @@ -586,6 +597,11 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes, NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols); + // Inject libfuzzer dependencies. + if (TC.getSanitizerArgs().needsFuzzer()) { + addLibFuzzerRuntime(TC, Args, CmdArgs); + } + for (auto RT : SharedRuntimes) addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false); for (auto RT : HelperStaticRuntimes) diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp index 009a12da3015..e41b50c40b28 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp @@ -930,6 +930,18 @@ void MachO::AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs, } } +void MachO::AddFuzzerLinkArgs(const ArgList &Args, ArgStringList &CmdArgs) const { + + // Go up one directory from Clang to find the libfuzzer archive file. + StringRef ParentDir = llvm::sys::path::parent_path(getDriver().InstalledDir); + SmallString<128> P(ParentDir); + llvm::sys::path::append(P, "lib", "libLLVMFuzzer.a"); + CmdArgs.push_back(Args.MakeArgString(P)); + + // Libfuzzer is written in C++ and requires libcxx. + AddCXXStdlibLibArgs(Args, CmdArgs); +} + StringRef Darwin::getPlatformFamily() const { switch (TargetPlatform) { case DarwinPlatformKind::MacOS: @@ -1035,10 +1047,14 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args, const SanitizerArgs &Sanitize = getSanitizerArgs(); if (Sanitize.needsAsanRt()) AddLinkSanitizerLibArgs(Args, CmdArgs, "asan"); + if (Sanitize.needsLsanRt()) + AddLinkSanitizerLibArgs(Args, CmdArgs, "lsan"); if (Sanitize.needsUbsanRt()) AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan"); if (Sanitize.needsTsanRt()) AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan"); + if (Sanitize.needsFuzzer()) + AddFuzzerLinkArgs(Args, CmdArgs); if (Sanitize.needsStatsRt()) { StringRef OS = isTargetMacOS() ? "osx" : "iossim"; AddLinkRuntimeLib(Args, CmdArgs, @@ -1892,6 +1908,8 @@ SanitizerMask Darwin::getSupportedSanitizers() const { const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; SanitizerMask Res = ToolChain::getSupportedSanitizers(); Res |= SanitizerKind::Address; + Res |= SanitizerKind::Leak; + Res |= SanitizerKind::Fuzzer; if (isTargetMacOS()) { if (!isMacosxVersionLT(10, 9)) Res |= SanitizerKind::Vptr; diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.h b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.h index 984f8ef0c41f..16ed04286ac0 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.h +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.h @@ -154,6 +154,8 @@ class LLVM_LIBRARY_VISIBILITY MachO : public ToolChain { /// Add the linker arguments to link the compiler runtime library. virtual void AddLinkRuntimeLibArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + virtual void AddFuzzerLinkArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; virtual void addStartObjectFileArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const { diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp index 313bf38c2860..d29d826b5f44 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp @@ -1747,7 +1747,9 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const { static const char *const ARMTriples[] = {"arm-linux-gnueabi", "arm-linux-androideabi"}; static const char *const ARMHFTriples[] = {"arm-linux-gnueabihf", - "armv7hl-redhat-linux-gnueabi"}; + "armv7hl-redhat-linux-gnueabi", + "armv6hl-suse-linux-gnueabi", + "armv7hl-suse-linux-gnueabi"}; static const char *const ARMebLibDirs[] = {"/lib"}; static const char *const ARMebTriples[] = {"armeb-linux-gnueabi", "armeb-linux-androideabi"}; diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp index c143b7f89868..1d7bcf8e4df0 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -402,6 +402,40 @@ Tool *HexagonToolChain::buildLinker() const { return new tools::hexagon::Linker(*this); } +unsigned HexagonToolChain::getOptimizationLevel( + const llvm::opt::ArgList &DriverArgs) const { + // Copied in large part from lib/Frontend/CompilerInvocation.cpp. + Arg *A = DriverArgs.getLastArg(options::OPT_O_Group); + if (!A) + return 0; + + if (A->getOption().matches(options::OPT_O0)) + return 0; + if (A->getOption().matches(options::OPT_Ofast) || + A->getOption().matches(options::OPT_O4)) + return 3; + assert(A->getNumValues() != 0); + StringRef S(A->getValue()); + if (S == "s" || S == "z" || S.empty()) + return 2; + if (S == "g") + return 1; + + unsigned OptLevel; + if (S.getAsInteger(10, OptLevel)) + return 0; + return OptLevel; +} + +void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + if (DriverArgs.hasArg(options::OPT_ffp_contract)) + return; + unsigned OptLevel = getOptimizationLevel(DriverArgs); + if (OptLevel >= 3) + CC1Args.push_back("-ffp-contract=fast"); +} + void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (DriverArgs.hasArg(options::OPT_nostdinc) || diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.h b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.h index fb50ba3c72c3..78f97a3d59fd 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.h +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.h @@ -61,11 +61,15 @@ class LLVM_LIBRARY_VISIBILITY HexagonToolChain : public Linux { Tool *buildAssembler() const override; Tool *buildLinker() const override; + unsigned getOptimizationLevel(const llvm::opt::ArgList &DriverArgs) const; + public: HexagonToolChain(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); ~HexagonToolChain() override; + void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp index 3ffb2f6e03e5..50443a125244 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp @@ -869,6 +869,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { llvm::Triple::thumbeb; SanitizerMask Res = ToolChain::getSupportedSanitizers(); Res |= SanitizerKind::Address; + Res |= SanitizerKind::Fuzzer; Res |= SanitizerKind::KernelAddress; Res |= SanitizerKind::Vptr; Res |= SanitizerKind::SafeStack; diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp index 73ae10a29f8f..3adb72c11da8 100644 --- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp +++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp @@ -792,7 +792,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Previous && Previous->is(tok::question)) State.Stack.back().QuestionColumn = State.Column; } - if (!Current.opensScope() && !Current.closesScope()) + if (!Current.opensScope() && !Current.closesScope() && + !Current.is(TT_PointerOrReference)) State.LowestLevelOnLine = std::min(State.LowestLevelOnLine, Current.NestingLevel); if (Current.isMemberAccess()) diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp index 0e2da71343d5..f55a623a8d1f 100644 --- a/contrib/llvm/tools/clang/lib/Format/Format.cpp +++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp @@ -908,8 +908,8 @@ class JavaScriptRequoter : public TokenAnalyzer { class Formatter : public TokenAnalyzer { public: Formatter(const Environment &Env, const FormatStyle &Style, - bool *IncompleteFormat) - : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {} + FormattingAttemptStatus *Status) + : TokenAnalyzer(Env, Style), Status(Status) {} tooling::Replacements analyze(TokenAnnotator &Annotator, @@ -931,7 +931,7 @@ class Formatter : public TokenAnalyzer { Env.getSourceManager(), Whitespaces, Encoding, BinPackInconclusiveFunctions); UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), - IncompleteFormat) + Env.getSourceManager(), Status) .format(AnnotatedLines); for (const auto &R : Whitespaces.generateReplacements()) if (Result.add(R)) @@ -1013,7 +1013,7 @@ class Formatter : public TokenAnalyzer { } bool BinPackInconclusiveFunctions; - bool *IncompleteFormat; + FormattingAttemptStatus *Status; }; // This class clean up the erroneous/redundant code around the given ranges in @@ -1830,7 +1830,8 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, - StringRef FileName, bool *IncompleteFormat) { + StringRef FileName, + FormattingAttemptStatus *Status) { FormatStyle Expanded = expandPresets(Style); if (Expanded.DisableFormat) return tooling::Replacements(); @@ -1846,11 +1847,11 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, auto NewEnv = Environment::CreateVirtualEnvironment( *NewCode, FileName, tooling::calculateRangesAfterReplacements(Fixes, Ranges)); - Formatter Format(*NewEnv, Expanded, IncompleteFormat); + Formatter Format(*NewEnv, Expanded, Status); return Fixes.merge(Format.process()); } } - Formatter Format(*Env, Expanded, IncompleteFormat); + Formatter Format(*Env, Expanded, Status); return Format.process(); }; @@ -1866,7 +1867,7 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, return reformatAfterApplying(Requoter); } - Formatter Format(*Env, Expanded, IncompleteFormat); + Formatter Format(*Env, Expanded, Status); return Format.process(); } @@ -1879,6 +1880,16 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, return Clean.process(); } +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + StringRef FileName, bool *IncompleteFormat) { + FormattingAttemptStatus Status; + auto Result = reformat(Style, Code, Ranges, FileName, &Status); + if (!Status.FormatComplete) + *IncompleteFormat = true; + return Result; +} + tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.h b/contrib/llvm/tools/clang/lib/Format/FormatToken.h index c9649126d93f..3b3600fede97 100644 --- a/contrib/llvm/tools/clang/lib/Format/FormatToken.h +++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.h @@ -617,10 +617,12 @@ struct AdditionalKeywords { kw_finally = &IdentTable.get("finally"); kw_from = &IdentTable.get("from"); kw_function = &IdentTable.get("function"); + kw_get = &IdentTable.get("get"); kw_import = &IdentTable.get("import"); kw_is = &IdentTable.get("is"); kw_let = &IdentTable.get("let"); kw_module = &IdentTable.get("module"); + kw_set = &IdentTable.get("set"); kw_type = &IdentTable.get("type"); kw_var = &IdentTable.get("var"); kw_yield = &IdentTable.get("yield"); @@ -675,10 +677,12 @@ struct AdditionalKeywords { IdentifierInfo *kw_finally; IdentifierInfo *kw_from; IdentifierInfo *kw_function; + IdentifierInfo *kw_get; IdentifierInfo *kw_import; IdentifierInfo *kw_is; IdentifierInfo *kw_let; IdentifierInfo *kw_module; + IdentifierInfo *kw_set; IdentifierInfo *kw_type; IdentifierInfo *kw_var; IdentifierInfo *kw_yield; diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp index bbc2d1e52b63..c274d7bf07f8 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp @@ -1120,7 +1120,11 @@ class AnnotatingParser { Current.Type = TT_FunctionAnnotationRParen; } } - } else if (Current.is(tok::at) && Current.Next) { + } else if (Current.is(tok::at) && Current.Next && + Style.Language != FormatStyle::LK_JavaScript && + Style.Language != FormatStyle::LK_Java) { + // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it + // marks declarations and properties that need special formatting. switch (Current.Next->Tok.getObjCKeywordID()) { case tok::objc_interface: case tok::objc_implementation: @@ -2541,9 +2545,11 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_JavaScript) { const FormatToken *NonComment = Right.getPreviousNonComment(); if (NonComment && - NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, - tok::kw_throw, Keywords.kw_interface, - Keywords.kw_type)) + NonComment->isOneOf( + tok::kw_return, tok::kw_continue, tok::kw_break, tok::kw_throw, + Keywords.kw_interface, Keywords.kw_type, tok::kw_static, + tok::kw_public, tok::kw_private, tok::kw_protected, + Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set)) return false; // Otherwise a semicolon is inserted. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp index c3c154afeb8a..8ff893426e25 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -835,8 +835,11 @@ UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, bool ShouldFormat = TheLine.Affected || FixIndentation; // We cannot format this line; if the reason is that the line had a // parsing error, remember that. - if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat) - *IncompleteFormat = true; + if (ShouldFormat && TheLine.Type == LT_Invalid && Status) { + Status->FormatComplete = false; + Status->Line = + SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation()); + } if (ShouldFormat && TheLine.Type != LT_Invalid) { if (!DryRun) diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h index 93247f71d6e0..55f0d1cac689 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h @@ -32,9 +32,11 @@ class UnwrappedLineFormatter { WhitespaceManager *Whitespaces, const FormatStyle &Style, const AdditionalKeywords &Keywords, - bool *IncompleteFormat) + const SourceManager &SourceMgr, + FormattingAttemptStatus *Status) : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), - Keywords(Keywords), IncompleteFormat(IncompleteFormat) {} + Keywords(Keywords), SourceMgr(SourceMgr), + Status(Status) {} /// \brief Format the current block and return the penalty. unsigned format(const SmallVectorImpl &Lines, @@ -63,7 +65,8 @@ class UnwrappedLineFormatter { WhitespaceManager *Whitespaces; const FormatStyle &Style; const AdditionalKeywords &Keywords; - bool *IncompleteFormat; + const SourceManager &SourceMgr; + FormattingAttemptStatus *Status; }; } // end namespace format } // end namespace clang diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index 0e0eb40eb334..8cdb8298ee9e 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -519,6 +519,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.MacroDebugInfo = Args.hasArg(OPT_debug_info_macro); Opts.WholeProgramVTables = Args.hasArg(OPT_fwhole_program_vtables); Opts.LTOVisibilityPublicStd = Args.hasArg(OPT_flto_visibility_public_std); + Opts.EnableSplitDwarf = Args.hasArg(OPT_enable_split_dwarf); Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file); Opts.SplitDwarfInlining = !Args.hasArg(OPT_fno_split_dwarf_inlining); Opts.DebugTypeExtRefs = Args.hasArg(OPT_dwarf_ext_refs); diff --git a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp index 88be7732f519..0dd04e8a8fff 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp @@ -882,14 +882,16 @@ static void InitializePredefinedMacros(const TargetInfo &TI, // The value written by __atomic_test_and_set. // FIXME: This is target-dependent. Builder.defineMacro("__GCC_ATOMIC_TEST_AND_SET_TRUEVAL", "1"); + } + auto addLockFreeMacros = [&](const llvm::Twine &Prefix) { // Used by libc++ and libstdc++ to implement ATOMIC__LOCK_FREE. unsigned InlineWidthBits = TI.getMaxAtomicInlineWidth(); -#define DEFINE_LOCK_FREE_MACRO(TYPE, Type) \ - Builder.defineMacro("__GCC_ATOMIC_" #TYPE "_LOCK_FREE", \ - getLockFreeValue(TI.get##Type##Width(), \ - TI.get##Type##Align(), \ - InlineWidthBits)); +#define DEFINE_LOCK_FREE_MACRO(TYPE, Type) \ + Builder.defineMacro(Prefix + #TYPE "_LOCK_FREE", \ + getLockFreeValue(TI.get##Type##Width(), \ + TI.get##Type##Align(), \ + InlineWidthBits)); DEFINE_LOCK_FREE_MACRO(BOOL, Bool); DEFINE_LOCK_FREE_MACRO(CHAR, Char); DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16); @@ -899,12 +901,15 @@ static void InitializePredefinedMacros(const TargetInfo &TI, DEFINE_LOCK_FREE_MACRO(INT, Int); DEFINE_LOCK_FREE_MACRO(LONG, Long); DEFINE_LOCK_FREE_MACRO(LLONG, LongLong); - Builder.defineMacro("__GCC_ATOMIC_POINTER_LOCK_FREE", + Builder.defineMacro(Prefix + "POINTER_LOCK_FREE", getLockFreeValue(TI.getPointerWidth(0), TI.getPointerAlign(0), InlineWidthBits)); #undef DEFINE_LOCK_FREE_MACRO - } + }; + addLockFreeMacros("__CLANG_ATOMIC_"); + if (!LangOpts.MSVCCompat) + addLockFreeMacros("__GCC_ATOMIC_"); if (LangOpts.NoInlineDefine) Builder.defineMacro("__NO_INLINE__"); diff --git a/contrib/llvm/tools/clang/lib/Headers/stdatomic.h b/contrib/llvm/tools/clang/lib/Headers/stdatomic.h index 23bb3a357768..b4845a74e494 100644 --- a/contrib/llvm/tools/clang/lib/Headers/stdatomic.h +++ b/contrib/llvm/tools/clang/lib/Headers/stdatomic.h @@ -40,16 +40,16 @@ extern "C" { /* 7.17.1 Introduction */ -#define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE -#define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE -#define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE -#define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE -#define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE -#define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE -#define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE -#define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE -#define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE -#define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE +#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE +#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE +#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE +#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE +#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE +#define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE +#define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE +#define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE +#define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE +#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE /* 7.17.2 Initialization */ diff --git a/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp index 0e893505516f..c1eed1684cbd 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp @@ -14,6 +14,13 @@ using namespace clang; using namespace index; +#define TRY_DECL(D,CALL_EXPR) \ + do { \ + if (!IndexCtx.shouldIndex(D)) return true; \ + if (!CALL_EXPR) \ + return false; \ + } while (0) + #define TRY_TO(CALL_EXPR) \ do { \ if (!CALL_EXPR) \ @@ -120,8 +127,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { D->getDeclContext(), 0); } - if (!IndexCtx.handleDecl(D, MethodLoc, Roles, Relations)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D, MethodLoc, Roles, Relations)); IndexCtx.indexTypeSourceInfo(D->getReturnTypeSourceInfo(), D); bool hasIBActionAndFirst = D->hasAttr(); for (const auto *I : D->parameters()) { @@ -138,6 +144,53 @@ class IndexingDeclVisitor : public ConstDeclVisitor { return true; } + /// Gather the declarations which the given declaration \D overrides in a + /// pseudo-override manner. + /// + /// Pseudo-overrides occur when a class template specialization declares + /// a declaration that has the same name as a similar declaration in the + /// non-specialized template. + void + gatherTemplatePseudoOverrides(const NamedDecl *D, + SmallVectorImpl &Relations) { + if (!IndexCtx.getLangOpts().CPlusPlus) + return; + const auto *CTSD = + dyn_cast(D->getLexicalDeclContext()); + if (!CTSD) + return; + llvm::PointerUnion + Template = CTSD->getSpecializedTemplateOrPartial(); + if (const auto *CTD = Template.dyn_cast()) { + const CXXRecordDecl *Pattern = CTD->getTemplatedDecl(); + bool TypeOverride = isa(D); + for (const NamedDecl *ND : Pattern->lookup(D->getDeclName())) { + if (const auto *CTD = dyn_cast(ND)) + ND = CTD->getTemplatedDecl(); + if (ND->isImplicit()) + continue; + // Types can override other types. + if (!TypeOverride) { + if (ND->getKind() != D->getKind()) + continue; + } else if (!isa(ND)) + continue; + if (const auto *FD = dyn_cast(ND)) { + const auto *DFD = cast(D); + // Function overrides are approximated using the number of parameters. + if (FD->getStorageClass() != DFD->getStorageClass() || + FD->getNumParams() != DFD->getNumParams()) + continue; + } + Relations.emplace_back( + SymbolRoleSet(SymbolRole::RelationOverrideOf) | + SymbolRoleSet(SymbolRole::RelationSpecializationOf), + ND); + } + } + } + bool VisitFunctionDecl(const FunctionDecl *D) { if (D->isDeleted()) return true; @@ -152,9 +205,13 @@ class IndexingDeclVisitor : public ConstDeclVisitor { Relations.emplace_back((unsigned)SymbolRole::RelationOverrideOf, *I); } } + gatherTemplatePseudoOverrides(D, Relations); + if (const auto *Base = D->getPrimaryTemplate()) + Relations.push_back( + SymbolRelation(SymbolRoleSet(SymbolRole::RelationSpecializationOf), + Base->getTemplatedDecl())); - if (!IndexCtx.handleDecl(D, Roles, Relations)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D, Roles, Relations)); handleDeclarator(D); if (const CXXConstructorDecl *Ctor = dyn_cast(D)) { @@ -189,16 +246,18 @@ class IndexingDeclVisitor : public ConstDeclVisitor { } bool VisitVarDecl(const VarDecl *D) { - if (!IndexCtx.handleDecl(D)) - return false; + SmallVector Relations; + gatherTemplatePseudoOverrides(D, Relations); + TRY_DECL(D, IndexCtx.handleDecl(D, SymbolRoleSet(), Relations)); handleDeclarator(D); IndexCtx.indexBody(D->getInit(), D); return true; } bool VisitFieldDecl(const FieldDecl *D) { - if (!IndexCtx.handleDecl(D)) - return false; + SmallVector Relations; + gatherTemplatePseudoOverrides(D, Relations); + TRY_DECL(D, IndexCtx.handleDecl(D, SymbolRoleSet(), Relations)); handleDeclarator(D); if (D->isBitField()) IndexCtx.indexBody(D->getBitWidth(), D); @@ -212,8 +271,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { // handled in VisitObjCPropertyImplDecl return true; } - if (!IndexCtx.handleDecl(D)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D)); handleDeclarator(D); return true; } @@ -224,17 +282,18 @@ class IndexingDeclVisitor : public ConstDeclVisitor { } bool VisitEnumConstantDecl(const EnumConstantDecl *D) { - if (!IndexCtx.handleDecl(D)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D)); IndexCtx.indexBody(D->getInitExpr(), D); return true; } bool VisitTypedefNameDecl(const TypedefNameDecl *D) { - if (!D->isTransparentTag()) - if (!IndexCtx.handleDecl(D)) - return false; - IndexCtx.indexTypeSourceInfo(D->getTypeSourceInfo(), D); + if (!D->isTransparentTag()) { + SmallVector Relations; + gatherTemplatePseudoOverrides(D, Relations); + TRY_DECL(D, IndexCtx.handleDecl(D, SymbolRoleSet(), Relations)); + IndexCtx.indexTypeSourceInfo(D->getTypeSourceInfo(), D); + } return true; } @@ -242,7 +301,9 @@ class IndexingDeclVisitor : public ConstDeclVisitor { // Non-free standing tags are handled in indexTypeSourceInfo. if (D->isFreeStanding()) { if (D->isThisDeclarationADefinition()) { - IndexCtx.indexTagDecl(D); + SmallVector Relations; + gatherTemplatePseudoOverrides(D, Relations); + IndexCtx.indexTagDecl(D, Relations); } else { auto *Parent = dyn_cast(D->getDeclContext()); return IndexCtx.handleReference(D, D->getLocation(), Parent, @@ -272,7 +333,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { bool VisitObjCInterfaceDecl(const ObjCInterfaceDecl *D) { if (D->isThisDeclarationADefinition()) { - TRY_TO(IndexCtx.handleDecl(D)); + TRY_DECL(D, IndexCtx.handleDecl(D)); SourceLocation SuperLoc = D->getSuperClassLoc(); if (auto *SuperD = D->getSuperClass()) { bool hasSuperTypedef = false; @@ -303,7 +364,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { bool VisitObjCProtocolDecl(const ObjCProtocolDecl *D) { if (D->isThisDeclarationADefinition()) { - TRY_TO(IndexCtx.handleDecl(D)); + TRY_DECL(D, IndexCtx.handleDecl(D)); TRY_TO(handleReferencedProtocols(D->getReferencedProtocols(), D, /*superLoc=*/SourceLocation())); TRY_TO(IndexCtx.indexDeclContext(D)); @@ -322,8 +383,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { if (Class->isImplicitInterfaceDecl()) IndexCtx.handleDecl(Class); - if (!IndexCtx.handleDecl(D)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D)); // Visit implicit @synthesize property implementations first as their // location is reported at the name of the @implementation block. This @@ -342,6 +402,8 @@ class IndexingDeclVisitor : public ConstDeclVisitor { } bool VisitObjCCategoryDecl(const ObjCCategoryDecl *D) { + if (!IndexCtx.shouldIndex(D)) + return true; const ObjCInterfaceDecl *C = D->getClassInterface(); if (!C) return true; @@ -370,8 +432,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { SourceLocation CategoryLoc = D->getCategoryNameLoc(); if (!CategoryLoc.isValid()) CategoryLoc = D->getLocation(); - if (!IndexCtx.handleDecl(D, CategoryLoc)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D, CategoryLoc)); IndexCtx.indexDeclContext(D); return true; } @@ -393,8 +454,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { if (ObjCMethodDecl *MD = D->getSetterMethodDecl()) if (MD->getLexicalDeclContext() == D->getLexicalDeclContext()) handleObjCMethod(MD, D); - if (!IndexCtx.handleDecl(D)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D)); if (IBOutletCollectionAttr *attr = D->getAttr()) IndexCtx.indexTypeSourceInfo(attr->getInterfaceLoc(), D, D->getLexicalDeclContext(), false, true); @@ -415,8 +475,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { Loc = Container->getLocation(); Roles |= (SymbolRoleSet)SymbolRole::Implicit; } - if (!IndexCtx.handleDecl(D, Loc, Roles, Relations)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D, Loc, Roles, Relations)); if (D->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) return true; @@ -450,8 +509,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { } else if (D->getLocation() == IvarLoc) { IvarRoles = (SymbolRoleSet)SymbolRole::Implicit; } - if(!IndexCtx.handleDecl(IvarD, IvarLoc, IvarRoles)) - return false; + TRY_DECL(IvarD, IndexCtx.handleDecl(IvarD, IvarLoc, IvarRoles)); } else { IndexCtx.handleReference(IvarD, D->getPropertyIvarDeclLoc(), nullptr, D->getDeclContext(), SymbolRoleSet()); @@ -461,8 +519,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { } bool VisitNamespaceDecl(const NamespaceDecl *D) { - if (!IndexCtx.handleDecl(D)) - return false; + TRY_DECL(D, IndexCtx.handleDecl(D)); IndexCtx.indexDeclContext(D); return true; } @@ -507,6 +564,9 @@ class IndexingDeclVisitor : public ConstDeclVisitor { D, SymbolRelation(SymbolRoleSet(SymbolRole::RelationSpecializationOf), SpecializationOf)); } + if (TypeSourceInfo *TSI = D->getTypeAsWritten()) + IndexCtx.indexTypeSourceInfo(TSI, /*Parent=*/nullptr, + D->getLexicalDeclContext()); return true; } diff --git a/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp b/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp index ea66b7017951..0bfa19346b4e 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp @@ -318,6 +318,20 @@ SymbolInfo index::getSymbolInfo(const Decl *D) { if (Info.Properties & (unsigned)SymbolProperty::Generic) Info.Lang = SymbolLanguage::CXX; + auto getExternalSymAttr = [](const Decl *D) -> ExternalSourceSymbolAttr* { + if (auto *attr = D->getAttr()) + return attr; + if (auto *dcd = dyn_cast(D->getDeclContext())) { + if (auto *attr = dcd->getAttr()) + return attr; + } + return nullptr; + }; + if (auto *attr = getExternalSymAttr(D)) { + if (attr->getLanguage() == "Swift") + Info.Lang = SymbolLanguage::Swift; + } + return Info; } @@ -458,6 +472,7 @@ StringRef index::getSymbolLanguageString(SymbolLanguage K) { case SymbolLanguage::C: return "C"; case SymbolLanguage::ObjC: return "ObjC"; case SymbolLanguage::CXX: return "C++"; + case SymbolLanguage::Swift: return "Swift"; } llvm_unreachable("invalid symbol language kind"); } diff --git a/contrib/llvm/tools/clang/lib/Index/IndexTypeSourceInfo.cpp b/contrib/llvm/tools/clang/lib/Index/IndexTypeSourceInfo.cpp index a3566a9f2ae8..44d1241fb930 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexTypeSourceInfo.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexTypeSourceInfo.cpp @@ -210,6 +210,8 @@ void IndexingContext::indexNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS, void IndexingContext::indexTagDecl(const TagDecl *D, ArrayRef Relations) { + if (!shouldIndex(D)) + return; if (!shouldIndexFunctionLocalSymbols() && isFunctionLocalSymbol(D)) return; diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp index 85574d0a314d..709a23657b07 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp @@ -17,6 +17,21 @@ using namespace clang; using namespace index; +static bool isGeneratedDecl(const Decl *D) { + if (auto *attr = D->getAttr()) { + return attr->getGeneratedDeclaration(); + } + return false; +} + +bool IndexingContext::shouldIndex(const Decl *D) { + return !isGeneratedDecl(D); +} + +const LangOptions &IndexingContext::getLangOpts() const { + return Ctx->getLangOpts(); +} + bool IndexingContext::shouldIndexFunctionLocalSymbols() const { return IndexOpts.IndexFunctionLocals; } diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingContext.h b/contrib/llvm/tools/clang/lib/Index/IndexingContext.h index 1ebf6f9ce67a..566651c83a75 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexingContext.h +++ b/contrib/llvm/tools/clang/lib/Index/IndexingContext.h @@ -48,6 +48,10 @@ class IndexingContext { void setASTContext(ASTContext &ctx) { Ctx = &ctx; } + bool shouldIndex(const Decl *D); + + const LangOptions &getLangOpts() const; + bool shouldSuppressRefs() const { return false; } diff --git a/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp b/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp index 73dddd9a8b36..ed469f677a34 100644 --- a/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp +++ b/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp @@ -46,6 +46,15 @@ static bool printLoc(llvm::raw_ostream &OS, SourceLocation Loc, return false; } +static StringRef GetExternalSourceContainer(const NamedDecl *D) { + if (!D) + return StringRef(); + if (auto *attr = D->getAttr()) { + return attr->getDefinedIn(); + } + return StringRef(); +} + namespace { class USRGenerator : public ConstDeclVisitor { SmallVectorImpl &Buf; @@ -79,7 +88,8 @@ class USRGenerator : public ConstDeclVisitor { void VisitNamespaceAliasDecl(const NamespaceAliasDecl *D); void VisitFunctionTemplateDecl(const FunctionTemplateDecl *D); void VisitClassTemplateDecl(const ClassTemplateDecl *D); - void VisitObjCContainerDecl(const ObjCContainerDecl *CD); + void VisitObjCContainerDecl(const ObjCContainerDecl *CD, + const ObjCCategoryDecl *CatD = nullptr); void VisitObjCMethodDecl(const ObjCMethodDecl *MD); void VisitObjCPropertyDecl(const ObjCPropertyDecl *D); void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); @@ -116,6 +126,8 @@ class USRGenerator : public ConstDeclVisitor { return D->getParentFunctionOrMethod() != nullptr; } + void GenExtSymbolContainer(const NamedDecl *D); + /// Generate the string component containing the location of the /// declaration. bool GenLoc(const Decl *D, bool IncludeOffset); @@ -127,13 +139,16 @@ class USRGenerator : public ConstDeclVisitor { /// itself. /// Generate a USR for an Objective-C class. - void GenObjCClass(StringRef cls) { - generateUSRForObjCClass(cls, Out); + void GenObjCClass(StringRef cls, StringRef ExtSymDefinedIn, + StringRef CategoryContextExtSymbolDefinedIn) { + generateUSRForObjCClass(cls, Out, ExtSymDefinedIn, + CategoryContextExtSymbolDefinedIn); } /// Generate a USR for an Objective-C class category. - void GenObjCCategory(StringRef cls, StringRef cat) { - generateUSRForObjCCategory(cls, cat, Out); + void GenObjCCategory(StringRef cls, StringRef cat, + StringRef clsExt, StringRef catExt) { + generateUSRForObjCCategory(cls, cat, Out, clsExt, catExt); } /// Generate a USR fragment for an Objective-C property. @@ -142,8 +157,8 @@ class USRGenerator : public ConstDeclVisitor { } /// Generate a USR for an Objective-C protocol. - void GenObjCProtocol(StringRef prot) { - generateUSRForObjCProtocol(prot, Out); + void GenObjCProtocol(StringRef prot, StringRef ext) { + generateUSRForObjCProtocol(prot, Out, ext); } void VisitType(QualType T); @@ -204,7 +219,11 @@ void USRGenerator::VisitFunctionDecl(const FunctionDecl *D) { if (ShouldGenerateLocation(D) && GenLoc(D, /*IncludeOffset=*/isLocal(D))) return; + const unsigned StartSize = Buf.size(); VisitDeclContext(D->getDeclContext()); + if (Buf.size() == StartSize) + GenExtSymbolContainer(D); + bool IsTemplate = false; if (FunctionTemplateDecl *FunTmpl = D->getDescribedFunctionTemplate()) { IsTemplate = true; @@ -367,7 +386,16 @@ void USRGenerator::VisitObjCMethodDecl(const ObjCMethodDecl *D) { IgnoreResults = true; return; } - Visit(ID); + auto getCategoryContext = [](const ObjCMethodDecl *D) -> + const ObjCCategoryDecl * { + if (auto *CD = dyn_cast(D->getDeclContext())) + return CD; + if (auto *ICD = dyn_cast(D->getDeclContext())) + return ICD->getCategoryDecl(); + return nullptr; + }; + auto *CD = getCategoryContext(D); + VisitObjCContainerDecl(ID, CD); } // Ideally we would use 'GenObjCMethod', but this is such a hot path // for Objective-C code that we don't want to use @@ -376,13 +404,15 @@ void USRGenerator::VisitObjCMethodDecl(const ObjCMethodDecl *D) { << DeclarationName(D->getSelector()); } -void USRGenerator::VisitObjCContainerDecl(const ObjCContainerDecl *D) { +void USRGenerator::VisitObjCContainerDecl(const ObjCContainerDecl *D, + const ObjCCategoryDecl *CatD) { switch (D->getKind()) { default: llvm_unreachable("Invalid ObjC container."); case Decl::ObjCInterface: case Decl::ObjCImplementation: - GenObjCClass(D->getName()); + GenObjCClass(D->getName(), GetExternalSourceContainer(D), + GetExternalSourceContainer(CatD)); break; case Decl::ObjCCategory: { const ObjCCategoryDecl *CD = cast(D); @@ -402,7 +432,9 @@ void USRGenerator::VisitObjCContainerDecl(const ObjCContainerDecl *D) { GenLoc(CD, /*IncludeOffset=*/true); } else - GenObjCCategory(ID->getName(), CD->getName()); + GenObjCCategory(ID->getName(), CD->getName(), + GetExternalSourceContainer(ID), + GetExternalSourceContainer(CD)); break; } @@ -417,12 +449,16 @@ void USRGenerator::VisitObjCContainerDecl(const ObjCContainerDecl *D) { IgnoreResults = true; return; } - GenObjCCategory(ID->getName(), CD->getName()); + GenObjCCategory(ID->getName(), CD->getName(), + GetExternalSourceContainer(ID), + GetExternalSourceContainer(CD)); break; } - case Decl::ObjCProtocol: - GenObjCProtocol(cast(D)->getName()); + case Decl::ObjCProtocol: { + const ObjCProtocolDecl *PD = cast(D); + GenObjCProtocol(PD->getName(), GetExternalSourceContainer(PD)); break; + } } } @@ -452,6 +488,8 @@ void USRGenerator::VisitTagDecl(const TagDecl *D) { ShouldGenerateLocation(D) && GenLoc(D, /*IncludeOffset=*/isLocal(D))) return; + GenExtSymbolContainer(D); + D = D->getCanonicalDecl(); VisitDeclContext(D->getDeclContext()); @@ -544,6 +582,12 @@ void USRGenerator::VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *D) { GenLoc(D, /*IncludeOffset=*/true); } +void USRGenerator::GenExtSymbolContainer(const NamedDecl *D) { + StringRef Container = GetExternalSourceContainer(D); + if (!Container.empty()) + Out << "@M@" << Container; +} + bool USRGenerator::GenLoc(const Decl *D, bool IncludeOffset) { if (generatedLoc) return IgnoreResults; @@ -866,12 +910,34 @@ void USRGenerator::VisitTemplateArgument(const TemplateArgument &Arg) { // USR generation functions. //===----------------------------------------------------------------------===// -void clang::index::generateUSRForObjCClass(StringRef Cls, raw_ostream &OS) { +static void combineClassAndCategoryExtContainers(StringRef ClsSymDefinedIn, + StringRef CatSymDefinedIn, + raw_ostream &OS) { + if (ClsSymDefinedIn.empty() && CatSymDefinedIn.empty()) + return; + if (CatSymDefinedIn.empty()) { + OS << "@M@" << ClsSymDefinedIn << '@'; + return; + } + OS << "@CM@" << CatSymDefinedIn << '@'; + if (ClsSymDefinedIn != CatSymDefinedIn) { + OS << ClsSymDefinedIn << '@'; + } +} + +void clang::index::generateUSRForObjCClass(StringRef Cls, raw_ostream &OS, + StringRef ExtSymDefinedIn, + StringRef CategoryContextExtSymbolDefinedIn) { + combineClassAndCategoryExtContainers(ExtSymDefinedIn, + CategoryContextExtSymbolDefinedIn, OS); OS << "objc(cs)" << Cls; } void clang::index::generateUSRForObjCCategory(StringRef Cls, StringRef Cat, - raw_ostream &OS) { + raw_ostream &OS, + StringRef ClsSymDefinedIn, + StringRef CatSymDefinedIn) { + combineClassAndCategoryExtContainers(ClsSymDefinedIn, CatSymDefinedIn, OS); OS << "objc(cy)" << Cls << '@' << Cat; } @@ -890,10 +956,25 @@ void clang::index::generateUSRForObjCProperty(StringRef Prop, bool isClassProp, OS << (isClassProp ? "(cpy)" : "(py)") << Prop; } -void clang::index::generateUSRForObjCProtocol(StringRef Prot, raw_ostream &OS) { +void clang::index::generateUSRForObjCProtocol(StringRef Prot, raw_ostream &OS, + StringRef ExtSymDefinedIn) { + if (!ExtSymDefinedIn.empty()) + OS << "@M@" << ExtSymDefinedIn << '@'; OS << "objc(pl)" << Prot; } +void clang::index::generateUSRForGlobalEnum(StringRef EnumName, raw_ostream &OS, + StringRef ExtSymDefinedIn) { + if (!ExtSymDefinedIn.empty()) + OS << "@M@" << ExtSymDefinedIn; + OS << "@E@" << EnumName; +} + +void clang::index::generateUSRForEnumConstant(StringRef EnumConstantName, + raw_ostream &OS) { + OS << '@' << EnumConstantName; +} + bool clang::index::generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf) { if (!D) diff --git a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp index 5b60ed3f812a..512d7dc5de68 100644 --- a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp @@ -581,6 +581,7 @@ Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, auto *Result = new Module(Name, Loc, nullptr, /*IsFramework*/ false, /*IsExplicit*/ false, NumCreatedModules++); + Result->Kind = Module::ModuleInterfaceUnit; Modules[Name] = SourceModule = Result; // Mark the main source file as being within the newly-created module so that diff --git a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp index f81eaa31e9e9..87e105d1d03d 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp @@ -989,9 +989,9 @@ struct PragmaDebugHandler : public PragmaHandler { #ifdef _MSC_VER #pragma warning(disable : 4717) #endif - static void DebugOverflowStack() { - void (*volatile Self)() = DebugOverflowStack; - Self(); + static void DebugOverflowStack(void (*P)() = nullptr) { + void (*volatile Self)(void(*P)()) = DebugOverflowStack; + Self(reinterpret_cast(Self)); } #ifdef _MSC_VER #pragma warning(default : 4717) diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp index ad45ab114fde..3e02e46ddc7d 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp @@ -339,7 +339,7 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { ColonLoc = Tok.getLocation(); } } - + // Code completion for the right-hand side of an assignment expression // goes through a special hook that takes the left-hand side into account. if (Tok.is(tok::code_completion) && NextTokPrec == prec::Assignment) { @@ -347,7 +347,7 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { cutOffParsing(); return ExprError(); } - + // Parse another leaf here for the RHS of the operator. // ParseCastExpression works here because all RHS expressions in C have it // as a prefix, at least. However, in C++, an assignment-expression could @@ -456,6 +456,7 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { if (!getLangOpts().CPlusPlus) continue; } + // Ensure potential typos aren't left undiagnosed. if (LHS.isInvalid()) { Actions.CorrectDelayedTyposInExpr(OrigLHS); diff --git a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp index edbfc636bc46..38476ce7e15d 100644 --- a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp @@ -534,23 +534,6 @@ void Parser::LateTemplateParserCleanupCallback(void *P) { } bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result) { - // C++ Modules TS: module-declaration must be the first declaration in the - // file. (There can be no preceding preprocessor directives, but we expect - // the lexer to check that.) - if (Tok.is(tok::kw_module)) { - Result = ParseModuleDecl(); - return false; - } else if (getLangOpts().getCompilingModule() == - LangOptions::CMK_ModuleInterface) { - // FIXME: We avoid providing this diagnostic when generating an object file - // from an existing PCM file. This is not a good way to detect this - // condition; we should provide a mechanism to indicate whether we've - // already parsed a declaration in this translation unit and avoid calling - // ParseFirstTopLevelDecl in that case. - if (Actions.TUKind == TU_Module) - Diag(Tok, diag::err_expected_module_interface_decl); - } - // C11 6.9p1 says translation units must have at least one top-level // declaration. C++ doesn't have this restriction. We also don't want to // complain if we have a precompiled header, although technically if the PCH @@ -583,6 +566,14 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result) { Result = ParseModuleImport(SourceLocation()); return false; + case tok::kw_export: + if (NextToken().isNot(tok::kw_module)) + break; + LLVM_FALLTHROUGH; + case tok::kw_module: + Result = ParseModuleDecl(); + return false; + case tok::annot_module_include: Actions.ActOnModuleInclude(Tok.getLocation(), reinterpret_cast( @@ -2049,30 +2040,28 @@ void Parser::ParseMicrosoftIfExistsExternalDeclaration() { /// Parse a C++ Modules TS module declaration, which appears at the beginning /// of a module interface, module partition, or module implementation file. /// -/// module-declaration: [Modules TS + P0273R0] -/// 'module' module-kind[opt] module-name attribute-specifier-seq[opt] ';' -/// module-kind: -/// 'implementation' -/// 'partition' +/// module-declaration: [Modules TS + P0273R0 + P0629R0] +/// 'export'[opt] 'module' 'partition'[opt] +/// module-name attribute-specifier-seq[opt] ';' /// -/// Note that the module-kind values are context-sensitive keywords. +/// Note that 'partition' is a context-sensitive keyword. Parser::DeclGroupPtrTy Parser::ParseModuleDecl() { - assert(Tok.is(tok::kw_module) && getLangOpts().ModulesTS && - "should not be parsing a module declaration"); + SourceLocation StartLoc = Tok.getLocation(); + + Sema::ModuleDeclKind MDK = TryConsumeToken(tok::kw_export) + ? Sema::ModuleDeclKind::Module + : Sema::ModuleDeclKind::Implementation; + + assert(Tok.is(tok::kw_module) && "not a module declaration"); SourceLocation ModuleLoc = ConsumeToken(); - // Check for a module-kind. - Sema::ModuleDeclKind MDK = Sema::ModuleDeclKind::Module; - if (Tok.is(tok::identifier) && NextToken().is(tok::identifier)) { - if (Tok.getIdentifierInfo()->isStr("implementation")) - MDK = Sema::ModuleDeclKind::Implementation; - else if (Tok.getIdentifierInfo()->isStr("partition")) - MDK = Sema::ModuleDeclKind::Partition; - else { - Diag(Tok, diag::err_unexpected_module_kind) << Tok.getIdentifierInfo(); - SkipUntil(tok::semi); - return nullptr; - } + if (Tok.is(tok::identifier) && NextToken().is(tok::identifier) && + Tok.getIdentifierInfo()->isStr("partition")) { + // If 'partition' is present, this must be a module interface unit. + if (MDK != Sema::ModuleDeclKind::Module) + Diag(Tok.getLocation(), diag::err_module_implementation_partition) + << FixItHint::CreateInsertion(ModuleLoc, "export "); + MDK = Sema::ModuleDeclKind::Partition; ConsumeToken(); } @@ -2080,14 +2069,14 @@ Parser::DeclGroupPtrTy Parser::ParseModuleDecl() { if (ParseModuleName(ModuleLoc, Path, /*IsImport*/false)) return nullptr; + // We don't support any module attributes yet; just parse them and diagnose. ParsedAttributesWithRange Attrs(AttrFactory); MaybeParseCXX11Attributes(Attrs); - // We don't support any module attributes yet. ProhibitCXX11Attributes(Attrs, diag::err_attribute_not_module_attr); ExpectAndConsumeSemi(diag::err_module_expected_semi); - return Actions.ActOnModuleDecl(ModuleLoc, MDK, Path); + return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path); } /// Parse a module import declaration. This is essentially the same for diff --git a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp index 950f04088822..94e792c1d17d 100644 --- a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp @@ -1161,10 +1161,14 @@ void Sema::PushFunctionScope() { // memory for a new scope. FunctionScopes.back()->Clear(); FunctionScopes.push_back(FunctionScopes.back()); + if (LangOpts.OpenMP) + pushOpenMPFunctionRegion(); return; } FunctionScopes.push_back(new FunctionScopeInfo(getDiagnostics())); + if (LangOpts.OpenMP) + pushOpenMPFunctionRegion(); } void Sema::PushBlockScope(Scope *BlockScope, BlockDecl *Block) { @@ -1192,6 +1196,9 @@ void Sema::PopFunctionScopeInfo(const AnalysisBasedWarnings::Policy *WP, FunctionScopeInfo *Scope = FunctionScopes.pop_back_val(); assert(!FunctionScopes.empty() && "mismatched push/pop!"); + if (LangOpts.OpenMP) + popOpenMPFunctionRegion(Scope); + // Issue any analysis-based warnings. if (WP && D) AnalysisWarnings.IssueWarnings(*WP, Scope, D, blkExpr); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp index 45523b30ef22..044ec74679d5 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp @@ -408,7 +408,7 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) { } // Third argument is always an ndrange_t type. - if (Arg2->getType().getAsString() != "ndrange_t") { + if (Arg2->getType().getUnqualifiedType().getAsString() != "ndrange_t") { S.Diag(TheCall->getArg(2)->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) << "'ndrange_t'"; diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp index f3ffcf5d696c..f838c9a4877d 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp @@ -615,7 +615,7 @@ bool Sema::isMicrosoftMissingTypename(const CXXScopeSpec *SS, Scope *S) { CXXRecordDecl *RD = cast(CurContext); for (const auto &Base : RD->bases()) - if (Context.hasSameUnqualifiedType(QualType(Ty, 1), Base.getType())) + if (Ty && Context.hasSameUnqualifiedType(QualType(Ty, 1), Base.getType())) return true; return S->isFunctionPrototypeScope(); } @@ -13523,7 +13523,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // If this is an undefined enum, warn. if (TUK != TUK_Definition && !Invalid) { TagDecl *Def; - if ((getLangOpts().CPlusPlus11 || getLangOpts().ObjC2) && + if (!EnumUnderlyingIsImplicit && + (getLangOpts().CPlusPlus11 || getLangOpts().ObjC2) && cast(New)->isFixed()) { // C++0x: 7.2p2: opaque-enum-declaration. // Conflicts are diagnosed above. Do nothing. @@ -15687,30 +15688,41 @@ static void checkModuleImportContext(Sema &S, Module *M, } } -Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation ModuleLoc, +Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc, + SourceLocation ModuleLoc, ModuleDeclKind MDK, ModuleIdPath Path) { - // 'module implementation' requires that we are not compiling a module of any - // kind. 'module' and 'module partition' require that we are compiling a - // module inteface (not a module map). - auto CMK = getLangOpts().getCompilingModule(); - if (MDK == ModuleDeclKind::Implementation - ? CMK != LangOptions::CMK_None - : CMK != LangOptions::CMK_ModuleInterface) { + // A module implementation unit requires that we are not compiling a module + // of any kind. A module interface unit requires that we are not compiling a + // module map. + switch (getLangOpts().getCompilingModule()) { + case LangOptions::CMK_None: + // It's OK to compile a module interface as a normal translation unit. + break; + + case LangOptions::CMK_ModuleInterface: + if (MDK != ModuleDeclKind::Implementation) + break; + + // We were asked to compile a module interface unit but this is a module + // implementation unit. That indicates the 'export' is missing. Diag(ModuleLoc, diag::err_module_interface_implementation_mismatch) - << (unsigned)MDK; + << FixItHint::CreateInsertion(ModuleLoc, "export "); + break; + + case LangOptions::CMK_ModuleMap: + Diag(ModuleLoc, diag::err_module_decl_in_module_map_module); return nullptr; } // FIXME: Create a ModuleDecl and return it. // FIXME: Most of this work should be done by the preprocessor rather than - // here, in case we look ahead across something where the current - // module matters (eg a #include). + // here, in order to support macro import. - // The dots in a module name in the Modules TS are a lie. Unlike Clang's - // hierarchical module map modules, the dots here are just another character - // that can appear in a module name. Flatten down to the actual module name. + // Flatten the dots in a module name. Unlike Clang's hierarchical module map + // modules, the dots here are just another character that can appear in a + // module name. std::string ModuleName; for (auto &Piece : Path) { if (!ModuleName.empty()) @@ -15735,8 +15747,8 @@ Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation ModuleLoc, case ModuleDeclKind::Module: { // FIXME: Check we're not in a submodule. - // We can't have imported a definition of this module or parsed a module - // map defining it already. + // We can't have parsed or imported a definition of this module or parsed a + // module map defining it already. if (auto *M = Map.findModule(ModuleName)) { Diag(Path[0].second, diag::err_module_redefinition) << ModuleName; if (M->DefinitionLoc.isValid()) @@ -15910,6 +15922,12 @@ Decl *Sema::ActOnStartExportDecl(Scope *S, SourceLocation ExportLoc, ExportDecl *D = ExportDecl::Create(Context, CurContext, ExportLoc); // C++ Modules TS draft: + // An export-declaration shall appear in the purview of a module other than + // the global module. + if (ModuleScopes.empty() || !ModuleScopes.back().Module || + ModuleScopes.back().Module->Kind != Module::ModuleInterfaceUnit) + Diag(ExportLoc, diag::err_export_not_in_module_interface); + // An export-declaration [...] shall not contain more than one // export keyword. // diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp index 027b3fe0e782..17c6975ca5e9 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp @@ -7220,6 +7220,8 @@ void Sema::DiagnoseUnguardedAvailabilityViolations(Decl *D) { Body = FD->getBody(); } else if (auto *MD = dyn_cast(D)) Body = MD->getBody(); + else if (auto *BD = dyn_cast(D)) + Body = BD->getBody(); assert(Body && "Need a body here!"); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp index 5a56f7093777..00480f821fc6 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp @@ -171,9 +171,14 @@ DiagnoseAvailabilityOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc, if (AvailabilityResult Result = S.ShouldDiagnoseAvailabilityOfDecl(D, &Message)) { - if (Result == AR_NotYetIntroduced && S.getCurFunctionOrMethodDecl()) { - S.getEnclosingFunction()->HasPotentialAvailabilityViolations = true; - return; + if (Result == AR_NotYetIntroduced) { + if (S.getCurFunctionOrMethodDecl()) { + S.getEnclosingFunction()->HasPotentialAvailabilityViolations = true; + return; + } else if (S.getCurBlock() || S.getCurLambda()) { + S.getCurFunction()->HasPotentialAvailabilityViolations = true; + return; + } } const ObjCPropertyDecl *ObjCPDecl = nullptr; @@ -12498,6 +12503,9 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, BSI->TheDecl->setBody(cast(Body)); + if (Body && getCurFunction()->HasPotentialAvailabilityViolations) + DiagnoseUnguardedAvailabilityViolations(BSI->TheDecl); + // Try to apply the named return value optimization. We have to check again // if we can do this, though, because blocks keep return statements around // to deduce an implicit return type. diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp index e4d420f36832..ce76e14982db 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp @@ -1326,12 +1326,6 @@ bool Sema::CppLookupName(LookupResult &R, Scope *S) { return !R.empty(); } -/// \brief Find the declaration that a class temploid member specialization was -/// instantiated from, or the member itself if it is an explicit specialization. -static Decl *getInstantiatedFrom(Decl *D, MemberSpecializationInfo *MSInfo) { - return MSInfo->isExplicitSpecialization() ? D : MSInfo->getInstantiatedFrom(); -} - Module *Sema::getOwningModule(Decl *Entity) { // If it's imported, grab its owning module. Module *M = Entity->getImportedOwningModule(); @@ -1413,12 +1407,11 @@ static Module *getDefiningModule(Sema &S, Decl *Entity) { if (CXXRecordDecl *Pattern = RD->getTemplateInstantiationPattern()) Entity = Pattern; } else if (EnumDecl *ED = dyn_cast(Entity)) { - if (MemberSpecializationInfo *MSInfo = ED->getMemberSpecializationInfo()) - Entity = getInstantiatedFrom(ED, MSInfo); + if (auto *Pattern = ED->getTemplateInstantiationPattern()) + Entity = Pattern; } else if (VarDecl *VD = dyn_cast(Entity)) { - // FIXME: Map from variable template specializations back to the template. - if (MemberSpecializationInfo *MSInfo = VD->getMemberSpecializationInfo()) - Entity = getInstantiatedFrom(VD, MSInfo); + if (VarDecl *Pattern = VD->getTemplateInstantiationPattern()) + Entity = Pattern; } // Walk up to the containing context. That might also have been instantiated diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp index d7a6d2e5885e..fb13669407fc 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp @@ -118,7 +118,9 @@ class DSAStackTy final { typedef SmallVector StackTy; /// \brief Stack of used declaration and their data-sharing attributes. - StackTy Stack; + DeclSAMapTy Threadprivates; + const FunctionScopeInfo *CurrentNonCapturingFunctionScope = nullptr; + SmallVector, 4> Stack; /// \brief true, if check for DSA must be from parent directive, false, if /// from current directive. OpenMPClauseKind ClauseKindMode = OMPC_unknown; @@ -133,8 +135,14 @@ class DSAStackTy final { /// \brief Checks if the variable is a local for OpenMP region. bool isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter); + bool isStackEmpty() const { + return Stack.empty() || + Stack.back().second != CurrentNonCapturingFunctionScope || + Stack.back().first.empty(); + } + public: - explicit DSAStackTy(Sema &S) : Stack(1), SemaRef(S) {} + explicit DSAStackTy(Sema &S) : SemaRef(S) {} bool isClauseParsingMode() const { return ClauseKindMode != OMPC_unknown; } void setClauseParsingMode(OpenMPClauseKind K) { ClauseKindMode = K; } @@ -144,13 +152,38 @@ class DSAStackTy final { void push(OpenMPDirectiveKind DKind, const DeclarationNameInfo &DirName, Scope *CurScope, SourceLocation Loc) { - Stack.push_back(SharingMapTy(DKind, DirName, CurScope, Loc)); - Stack.back().DefaultAttrLoc = Loc; + if (Stack.empty() || + Stack.back().second != CurrentNonCapturingFunctionScope) + Stack.emplace_back(StackTy(), CurrentNonCapturingFunctionScope); + Stack.back().first.emplace_back(DKind, DirName, CurScope, Loc); + Stack.back().first.back().DefaultAttrLoc = Loc; } void pop() { - assert(Stack.size() > 1 && "Data-sharing attributes stack is empty!"); - Stack.pop_back(); + assert(!Stack.back().first.empty() && + "Data-sharing attributes stack is empty!"); + Stack.back().first.pop_back(); + } + + /// Start new OpenMP region stack in new non-capturing function. + void pushFunction() { + const FunctionScopeInfo *CurFnScope = SemaRef.getCurFunction(); + assert(!isa(CurFnScope)); + CurrentNonCapturingFunctionScope = CurFnScope; + } + /// Pop region stack for non-capturing function. + void popFunction(const FunctionScopeInfo *OldFSI) { + if (!Stack.empty() && Stack.back().second == OldFSI) { + assert(Stack.back().first.empty()); + Stack.pop_back(); + } + CurrentNonCapturingFunctionScope = nullptr; + for (const FunctionScopeInfo *FSI : llvm::reverse(SemaRef.FunctionScopes)) { + if (!isa(FSI)) { + CurrentNonCapturingFunctionScope = FSI; + break; + } + } } void addCriticalWithHint(OMPCriticalDirective *D, llvm::APSInt Hint) { @@ -229,31 +262,35 @@ class DSAStackTy final { /// \brief Returns currently analyzed directive. OpenMPDirectiveKind getCurrentDirective() const { - return Stack.back().Directive; + return isStackEmpty() ? OMPD_unknown : Stack.back().first.back().Directive; } /// \brief Returns parent directive. OpenMPDirectiveKind getParentDirective() const { - if (Stack.size() > 2) - return Stack[Stack.size() - 2].Directive; - return OMPD_unknown; + if (isStackEmpty() || Stack.back().first.size() == 1) + return OMPD_unknown; + return std::next(Stack.back().first.rbegin())->Directive; } /// \brief Set default data sharing attribute to none. void setDefaultDSANone(SourceLocation Loc) { - Stack.back().DefaultAttr = DSA_none; - Stack.back().DefaultAttrLoc = Loc; + assert(!isStackEmpty()); + Stack.back().first.back().DefaultAttr = DSA_none; + Stack.back().first.back().DefaultAttrLoc = Loc; } /// \brief Set default data sharing attribute to shared. void setDefaultDSAShared(SourceLocation Loc) { - Stack.back().DefaultAttr = DSA_shared; - Stack.back().DefaultAttrLoc = Loc; + assert(!isStackEmpty()); + Stack.back().first.back().DefaultAttr = DSA_shared; + Stack.back().first.back().DefaultAttrLoc = Loc; } DefaultDataSharingAttributes getDefaultDSA() const { - return Stack.back().DefaultAttr; + return isStackEmpty() ? DSA_unspecified + : Stack.back().first.back().DefaultAttr; } SourceLocation getDefaultDSALocation() const { - return Stack.back().DefaultAttrLoc; + return isStackEmpty() ? SourceLocation() + : Stack.back().first.back().DefaultAttrLoc; } /// \brief Checks if the specified variable is a threadprivate. @@ -264,52 +301,64 @@ class DSAStackTy final { /// \brief Marks current region as ordered (it has an 'ordered' clause). void setOrderedRegion(bool IsOrdered, Expr *Param) { - Stack.back().OrderedRegion.setInt(IsOrdered); - Stack.back().OrderedRegion.setPointer(Param); + assert(!isStackEmpty()); + Stack.back().first.back().OrderedRegion.setInt(IsOrdered); + Stack.back().first.back().OrderedRegion.setPointer(Param); } /// \brief Returns true, if parent region is ordered (has associated /// 'ordered' clause), false - otherwise. bool isParentOrderedRegion() const { - if (Stack.size() > 2) - return Stack[Stack.size() - 2].OrderedRegion.getInt(); - return false; + if (isStackEmpty() || Stack.back().first.size() == 1) + return false; + return std::next(Stack.back().first.rbegin())->OrderedRegion.getInt(); } /// \brief Returns optional parameter for the ordered region. Expr *getParentOrderedRegionParam() const { - if (Stack.size() > 2) - return Stack[Stack.size() - 2].OrderedRegion.getPointer(); - return nullptr; + if (isStackEmpty() || Stack.back().first.size() == 1) + return nullptr; + return std::next(Stack.back().first.rbegin())->OrderedRegion.getPointer(); } /// \brief Marks current region as nowait (it has a 'nowait' clause). void setNowaitRegion(bool IsNowait = true) { - Stack.back().NowaitRegion = IsNowait; + assert(!isStackEmpty()); + Stack.back().first.back().NowaitRegion = IsNowait; } /// \brief Returns true, if parent region is nowait (has associated /// 'nowait' clause), false - otherwise. bool isParentNowaitRegion() const { - if (Stack.size() > 2) - return Stack[Stack.size() - 2].NowaitRegion; - return false; + if (isStackEmpty() || Stack.back().first.size() == 1) + return false; + return std::next(Stack.back().first.rbegin())->NowaitRegion; } /// \brief Marks parent region as cancel region. void setParentCancelRegion(bool Cancel = true) { - if (Stack.size() > 2) - Stack[Stack.size() - 2].CancelRegion = - Stack[Stack.size() - 2].CancelRegion || Cancel; + if (!isStackEmpty() && Stack.back().first.size() > 1) { + auto &StackElemRef = *std::next(Stack.back().first.rbegin()); + StackElemRef.CancelRegion |= StackElemRef.CancelRegion || Cancel; + } } /// \brief Return true if current region has inner cancel construct. - bool isCancelRegion() const { return Stack.back().CancelRegion; } + bool isCancelRegion() const { + return isStackEmpty() ? false : Stack.back().first.back().CancelRegion; + } /// \brief Set collapse value for the region. - void setAssociatedLoops(unsigned Val) { Stack.back().AssociatedLoops = Val; } + void setAssociatedLoops(unsigned Val) { + assert(!isStackEmpty()); + Stack.back().first.back().AssociatedLoops = Val; + } /// \brief Return collapse value for region. - unsigned getAssociatedLoops() const { return Stack.back().AssociatedLoops; } + unsigned getAssociatedLoops() const { + return isStackEmpty() ? 0 : Stack.back().first.back().AssociatedLoops; + } /// \brief Marks current target region as one with closely nested teams /// region. void setParentTeamsRegionLoc(SourceLocation TeamsRegionLoc) { - if (Stack.size() > 2) - Stack[Stack.size() - 2].InnerTeamsRegionLoc = TeamsRegionLoc; + if (!isStackEmpty() && Stack.back().first.size() > 1) { + std::next(Stack.back().first.rbegin())->InnerTeamsRegionLoc = + TeamsRegionLoc; + } } /// \brief Returns true, if current region has closely nested teams region. bool hasInnerTeamsRegion() const { @@ -317,14 +366,20 @@ class DSAStackTy final { } /// \brief Returns location of the nested teams region (if any). SourceLocation getInnerTeamsRegionLoc() const { - if (Stack.size() > 1) - return Stack.back().InnerTeamsRegionLoc; - return SourceLocation(); + return isStackEmpty() ? SourceLocation() + : Stack.back().first.back().InnerTeamsRegionLoc; } - Scope *getCurScope() const { return Stack.back().CurScope; } - Scope *getCurScope() { return Stack.back().CurScope; } - SourceLocation getConstructLoc() { return Stack.back().ConstructLoc; } + Scope *getCurScope() const { + return isStackEmpty() ? nullptr : Stack.back().first.back().CurScope; + } + Scope *getCurScope() { + return isStackEmpty() ? nullptr : Stack.back().first.back().CurScope; + } + SourceLocation getConstructLoc() { + return isStackEmpty() ? SourceLocation() + : Stack.back().first.back().ConstructLoc; + } /// Do the check specified in \a Check to all component lists and return true /// if any issue is found. @@ -333,8 +388,10 @@ class DSAStackTy final { const llvm::function_ref< bool(OMPClauseMappableExprCommon::MappableExprComponentListRef, OpenMPClauseKind)> &Check) { - auto SI = Stack.rbegin(); - auto SE = Stack.rend(); + if (isStackEmpty()) + return false; + auto SI = Stack.back().first.rbegin(); + auto SE = Stack.back().first.rend(); if (SI == SE) return false; @@ -361,9 +418,9 @@ class DSAStackTy final { ValueDecl *VD, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPClauseKind WhereFoundClauseKind) { - assert(Stack.size() > 1 && + assert(!isStackEmpty() && "Not expecting to retrieve components from a empty stack!"); - auto &MEC = Stack.back().MappedExprComponents[VD]; + auto &MEC = Stack.back().first.back().MappedExprComponents[VD]; // Create new entry and append the new components there. MEC.Components.resize(MEC.Components.size() + 1); MEC.Components.back().append(Components.begin(), Components.end()); @@ -371,23 +428,25 @@ class DSAStackTy final { } unsigned getNestingLevel() const { - assert(Stack.size() > 1); - return Stack.size() - 2; + assert(!isStackEmpty()); + return Stack.back().first.size() - 1; } void addDoacrossDependClause(OMPDependClause *C, OperatorOffsetTy &OpsOffs) { - assert(Stack.size() > 2); - assert(isOpenMPWorksharingDirective(Stack[Stack.size() - 2].Directive)); - Stack[Stack.size() - 2].DoacrossDepends.insert({C, OpsOffs}); + assert(!isStackEmpty() && Stack.back().first.size() > 1); + auto &StackElem = *std::next(Stack.back().first.rbegin()); + assert(isOpenMPWorksharingDirective(StackElem.Directive)); + StackElem.DoacrossDepends.insert({C, OpsOffs}); } llvm::iterator_range getDoacrossDependClauses() const { - assert(Stack.size() > 1); - if (isOpenMPWorksharingDirective(Stack[Stack.size() - 1].Directive)) { - auto &Ref = Stack[Stack.size() - 1].DoacrossDepends; + assert(!isStackEmpty()); + auto &StackElem = Stack.back().first.back(); + if (isOpenMPWorksharingDirective(StackElem.Directive)) { + auto &Ref = StackElem.DoacrossDepends; return llvm::make_range(Ref.begin(), Ref.end()); } - return llvm::make_range(Stack[0].DoacrossDepends.end(), - Stack[0].DoacrossDepends.end()); + return llvm::make_range(StackElem.DoacrossDepends.end(), + StackElem.DoacrossDepends.end()); } }; bool isParallelOrTaskRegion(OpenMPDirectiveKind DKind) { @@ -416,7 +475,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator &Iter, auto *VD = dyn_cast(D); auto *FD = dyn_cast(D); DSAVarData DVar; - if (Iter == std::prev(Stack.rend())) { + if (isStackEmpty() || Iter == Stack.back().first.rend()) { // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced // in a region but not in construct] // File-scope or namespace-scope variables referenced in called routines @@ -490,8 +549,9 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator &Iter, // bound to the current team is shared. if (isOpenMPTaskingDirective(DVar.DKind)) { DSAVarData DVarTemp; - for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend(); - I != EE; ++I) { + auto I = Iter, E = Stack.back().first.rend(); + do { + ++I; // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables // Referenced in a Construct, implicitly determined, p.6] // In a task construct, if no default clause is present, a variable @@ -503,9 +563,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator &Iter, DVar.CKind = OMPC_firstprivate; return DVar; } - if (isParallelOrTaskRegion(I->Directive)) - break; - } + } while (I != E && !isParallelOrTaskRegion(I->Directive)); DVar.CKind = (DVarTemp.CKind == OMPC_unknown) ? OMPC_firstprivate : OMPC_shared; return DVar; @@ -520,12 +578,13 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator &Iter, } Expr *DSAStackTy::addUniqueAligned(ValueDecl *D, Expr *NewDE) { - assert(Stack.size() > 1 && "Data sharing attributes stack is empty"); + assert(!isStackEmpty() && "Data sharing attributes stack is empty"); D = getCanonicalDecl(D); - auto It = Stack.back().AlignedMap.find(D); - if (It == Stack.back().AlignedMap.end()) { + auto &StackElem = Stack.back().first.back(); + auto It = StackElem.AlignedMap.find(D); + if (It == StackElem.AlignedMap.end()) { assert(NewDE && "Unexpected nullptr expr to be added into aligned map"); - Stack.back().AlignedMap[D] = NewDE; + StackElem.AlignedMap[D] = NewDE; return nullptr; } else { assert(It->second && "Unexpected nullptr expr in the aligned map"); @@ -535,35 +594,43 @@ Expr *DSAStackTy::addUniqueAligned(ValueDecl *D, Expr *NewDE) { } void DSAStackTy::addLoopControlVariable(ValueDecl *D, VarDecl *Capture) { - assert(Stack.size() > 1 && "Data-sharing attributes stack is empty"); + assert(!isStackEmpty() && "Data-sharing attributes stack is empty"); D = getCanonicalDecl(D); - Stack.back().LCVMap.insert( - std::make_pair(D, LCDeclInfo(Stack.back().LCVMap.size() + 1, Capture))); + auto &StackElem = Stack.back().first.back(); + StackElem.LCVMap.insert( + {D, LCDeclInfo(StackElem.LCVMap.size() + 1, Capture)}); } DSAStackTy::LCDeclInfo DSAStackTy::isLoopControlVariable(ValueDecl *D) { - assert(Stack.size() > 1 && "Data-sharing attributes stack is empty"); + assert(!isStackEmpty() && "Data-sharing attributes stack is empty"); D = getCanonicalDecl(D); - return Stack.back().LCVMap.count(D) > 0 ? Stack.back().LCVMap[D] - : LCDeclInfo(0, nullptr); + auto &StackElem = Stack.back().first.back(); + auto It = StackElem.LCVMap.find(D); + if (It != StackElem.LCVMap.end()) + return It->second; + return {0, nullptr}; } DSAStackTy::LCDeclInfo DSAStackTy::isParentLoopControlVariable(ValueDecl *D) { - assert(Stack.size() > 2 && "Data-sharing attributes stack is empty"); + assert(!isStackEmpty() && Stack.back().first.size() > 1 && + "Data-sharing attributes stack is empty"); D = getCanonicalDecl(D); - return Stack[Stack.size() - 2].LCVMap.count(D) > 0 - ? Stack[Stack.size() - 2].LCVMap[D] - : LCDeclInfo(0, nullptr); + auto &StackElem = *std::next(Stack.back().first.rbegin()); + auto It = StackElem.LCVMap.find(D); + if (It != StackElem.LCVMap.end()) + return It->second; + return {0, nullptr}; } ValueDecl *DSAStackTy::getParentLoopControlVariable(unsigned I) { - assert(Stack.size() > 2 && "Data-sharing attributes stack is empty"); - if (Stack[Stack.size() - 2].LCVMap.size() < I) + assert(!isStackEmpty() && Stack.back().first.size() > 1 && + "Data-sharing attributes stack is empty"); + auto &StackElem = *std::next(Stack.back().first.rbegin()); + if (StackElem.LCVMap.size() < I) return nullptr; - for (auto &Pair : Stack[Stack.size() - 2].LCVMap) { + for (auto &Pair : StackElem.LCVMap) if (Pair.second.first == I) return Pair.first; - } return nullptr; } @@ -571,13 +638,13 @@ void DSAStackTy::addDSA(ValueDecl *D, Expr *E, OpenMPClauseKind A, DeclRefExpr *PrivateCopy) { D = getCanonicalDecl(D); if (A == OMPC_threadprivate) { - auto &Data = Stack[0].SharingMap[D]; + auto &Data = Threadprivates[D]; Data.Attributes = A; Data.RefExpr.setPointer(E); Data.PrivateCopy = nullptr; } else { - assert(Stack.size() > 1 && "Data-sharing attributes stack is empty"); - auto &Data = Stack.back().SharingMap[D]; + assert(!isStackEmpty() && "Data-sharing attributes stack is empty"); + auto &Data = Stack.back().first.back().SharingMap[D]; assert(Data.Attributes == OMPC_unknown || (A == Data.Attributes) || (A == OMPC_firstprivate && Data.Attributes == OMPC_lastprivate) || (A == OMPC_lastprivate && Data.Attributes == OMPC_firstprivate) || @@ -592,7 +659,7 @@ void DSAStackTy::addDSA(ValueDecl *D, Expr *E, OpenMPClauseKind A, Data.RefExpr.setPointerAndInt(E, IsLastprivate); Data.PrivateCopy = PrivateCopy; if (PrivateCopy) { - auto &Data = Stack.back().SharingMap[PrivateCopy->getDecl()]; + auto &Data = Stack.back().first.back().SharingMap[PrivateCopy->getDecl()]; Data.Attributes = A; Data.RefExpr.setPointerAndInt(PrivateCopy, IsLastprivate); Data.PrivateCopy = nullptr; @@ -602,19 +669,17 @@ void DSAStackTy::addDSA(ValueDecl *D, Expr *E, OpenMPClauseKind A, bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) { D = D->getCanonicalDecl(); - if (Stack.size() > 2) { - reverse_iterator I = Iter, E = std::prev(Stack.rend()); + if (!isStackEmpty() && Stack.back().first.size() > 1) { + reverse_iterator I = Iter, E = Stack.back().first.rend(); Scope *TopScope = nullptr; - while (I != E && !isParallelOrTaskRegion(I->Directive)) { + while (I != E && !isParallelOrTaskRegion(I->Directive)) ++I; - } if (I == E) return false; TopScope = I->CurScope ? I->CurScope->getParent() : nullptr; Scope *CurScope = getCurScope(); - while (CurScope != TopScope && !CurScope->isDeclScope(D)) { + while (CurScope != TopScope && !CurScope->isDeclScope(D)) CurScope = CurScope->getParent(); - } return CurScope != TopScope; } return false; @@ -665,16 +730,16 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) { D->getLocation()), OMPC_threadprivate); } - if (Stack[0].SharingMap.count(D)) { - DVar.RefExpr = Stack[0].SharingMap[D].RefExpr.getPointer(); + auto TI = Threadprivates.find(D); + if (TI != Threadprivates.end()) { + DVar.RefExpr = TI->getSecond().RefExpr.getPointer(); DVar.CKind = OMPC_threadprivate; return DVar; } - if (Stack.size() == 1) { + if (isStackEmpty()) // Not in OpenMP execution region and top scope was already checked. return DVar; - } // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced // in a Construct, C/C++, predetermined, p.4] @@ -722,11 +787,10 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) { // Explicitly specified attributes and local variables with predetermined // attributes. - auto StartI = std::next(Stack.rbegin()); - auto EndI = std::prev(Stack.rend()); - if (FromParent && StartI != EndI) { + auto StartI = std::next(Stack.back().first.rbegin()); + auto EndI = Stack.back().first.rend(); + if (FromParent && StartI != EndI) StartI = std::next(StartI); - } auto I = std::prev(StartI); if (I->SharingMap.count(D)) { DVar.RefExpr = I->SharingMap[D].RefExpr.getPointer(); @@ -740,12 +804,15 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) { DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D, bool FromParent) { - D = getCanonicalDecl(D); - auto StartI = Stack.rbegin(); - auto EndI = std::prev(Stack.rend()); - if (FromParent && StartI != EndI) { - StartI = std::next(StartI); + if (isStackEmpty()) { + StackTy::reverse_iterator I; + return getDSA(I, D); } + D = getCanonicalDecl(D); + auto StartI = Stack.back().first.rbegin(); + auto EndI = Stack.back().first.rend(); + if (FromParent && StartI != EndI) + StartI = std::next(StartI); return getDSA(StartI, D); } @@ -754,33 +821,40 @@ DSAStackTy::hasDSA(ValueDecl *D, const llvm::function_ref &CPred, const llvm::function_ref &DPred, bool FromParent) { + if (isStackEmpty()) + return {}; D = getCanonicalDecl(D); - auto StartI = std::next(Stack.rbegin()); - auto EndI = Stack.rend(); - if (FromParent && StartI != EndI) { + auto StartI = std::next(Stack.back().first.rbegin()); + auto EndI = Stack.back().first.rend(); + if (FromParent && StartI != EndI) StartI = std::next(StartI); - } - for (auto I = StartI, EE = EndI; I != EE; ++I) { + if (StartI == EndI) + return {}; + auto I = std::prev(StartI); + do { + ++I; if (!DPred(I->Directive) && !isParallelOrTaskRegion(I->Directive)) continue; DSAVarData DVar = getDSA(I, D); if (CPred(DVar.CKind)) return DVar; - } - return DSAVarData(); + } while (I != EndI); + return {}; } DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA( ValueDecl *D, const llvm::function_ref &CPred, const llvm::function_ref &DPred, bool FromParent) { + if (isStackEmpty()) + return {}; D = getCanonicalDecl(D); - auto StartI = std::next(Stack.rbegin()); - auto EndI = Stack.rend(); + auto StartI = std::next(Stack.back().first.rbegin()); + auto EndI = Stack.back().first.rend(); if (FromParent && StartI != EndI) StartI = std::next(StartI); if (StartI == EndI || !DPred(StartI->Directive)) - return DSAVarData(); + return {}; DSAVarData DVar = getDSA(StartI, D); return CPred(DVar.CKind) ? DVar : DSAVarData(); } @@ -790,9 +864,11 @@ bool DSAStackTy::hasExplicitDSA( unsigned Level, bool NotLastprivate) { if (CPred(ClauseKindMode)) return true; + if (isStackEmpty()) + return false; D = getCanonicalDecl(D); - auto StartI = std::next(Stack.begin()); - auto EndI = Stack.end(); + auto StartI = Stack.back().first.begin(); + auto EndI = Stack.back().first.end(); if (std::distance(StartI, EndI) <= (int)Level) return false; std::advance(StartI, Level); @@ -805,8 +881,10 @@ bool DSAStackTy::hasExplicitDSA( bool DSAStackTy::hasExplicitDirective( const llvm::function_ref &DPred, unsigned Level) { - auto StartI = std::next(Stack.begin()); - auto EndI = Stack.end(); + if (isStackEmpty()) + return false; + auto StartI = Stack.back().first.begin(); + auto EndI = Stack.back().first.end(); if (std::distance(StartI, EndI) <= (int)Level) return false; std::advance(StartI, Level); @@ -819,13 +897,12 @@ bool DSAStackTy::hasDirective( &DPred, bool FromParent) { // We look only in the enclosing region. - if (Stack.size() < 2) + if (isStackEmpty()) return false; - auto StartI = std::next(Stack.rbegin()); - auto EndI = std::prev(Stack.rend()); - if (FromParent && StartI != EndI) { + auto StartI = std::next(Stack.back().first.rbegin()); + auto EndI = Stack.back().first.rend(); + if (FromParent && StartI != EndI) StartI = std::next(StartI); - } for (auto I = StartI, EE = EndI; I != EE; ++I) { if (DPred(I->Directive, I->DirectiveName, I->ConstructLoc)) return true; @@ -839,6 +916,14 @@ void Sema::InitDataSharingAttributesStack() { #define DSAStack static_cast(VarDataSharingAttributesStack) +void Sema::pushOpenMPFunctionRegion() { + DSAStack->pushFunction(); +} + +void Sema::popOpenMPFunctionRegion(const FunctionScopeInfo *OldFSI) { + DSAStack->popFunction(OldFSI); +} + bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) { assert(LangOpts.OpenMP && "OpenMP is not allowed"); diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h index 693b5c088acc..f4a97f55789e 100644 --- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h +++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h @@ -2220,6 +2220,9 @@ class TreeTransform { Base = BaseResult.get(); QualType BaseType = Base->getType(); + if (isArrow && !BaseType->isPointerType()) + return ExprError(); + // FIXME: this involves duplicating earlier analysis in a lot of // cases; we should avoid this when possible. LookupResult R(getSema(), MemberNameInfo, Sema::LookupMemberName); diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp index 406c4b50d92b..7b1edc062d1e 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp @@ -4779,6 +4779,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); bool First = true; Module *CurrentModule = nullptr; + Module::ModuleKind ModuleKind = Module::ModuleMapModule; RecordData Record; while (true) { llvm::BitstreamEntry Entry = F.Stream.advanceSkippingSubblocks(); @@ -4871,6 +4872,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { CurrentModule->setASTFile(F.File); } + CurrentModule->Kind = ModuleKind; CurrentModule->Signature = F.Signature; CurrentModule->IsFromModuleFile = true; CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem; @@ -4969,6 +4971,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules); } + ModuleKind = (Module::ModuleKind)Record[2]; break; } diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp index bb869077f4cd..84d2420f4ba9 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp @@ -2694,9 +2694,10 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { unsigned ConflictAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the submodule metadata block. - RecordData::value_type Record[] = {getNumberOfModules(WritingModule), - FirstSubmoduleID - - NUM_PREDEF_SUBMODULE_IDS}; + RecordData::value_type Record[] = { + getNumberOfModules(WritingModule), + FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS, + (unsigned)WritingModule->Kind}; Stream.EmitRecord(SUBMODULE_METADATA, Record); // Write all of the submodules. diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 6e9b7fefa3d0..5730517289bb 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -177,7 +177,10 @@ class MallocChecker : public Checker KernelZeroFlagVal; void initIdentifierInfo(ASTContext &C) const; @@ -321,9 +327,12 @@ class MallocChecker : public Checker MallocChecker::performKernelMalloc( return None; } +SVal MallocChecker::evalMulForBufferSize(CheckerContext &C, const Expr *Blocks, + const Expr *BlockBytes) { + SValBuilder &SB = C.getSValBuilder(); + SVal BlocksVal = C.getSVal(Blocks); + SVal BlockBytesVal = C.getSVal(BlockBytes); + ProgramStateRef State = C.getState(); + SVal TotalSize = SB.evalBinOp(State, BO_Mul, BlocksVal, BlockBytesVal, + SB.getContext().getSizeType()); + return TotalSize; +} + void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const { if (C.wasInlined) return; @@ -813,10 +842,10 @@ void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const { State = ProcessZeroAllocation(C, CE, 0, State); } else if (FunI == II_realloc || FunI == II_g_realloc || FunI == II_g_try_realloc) { - State = ReallocMem(C, CE, false, State); + State = ReallocMemAux(C, CE, false, State); State = ProcessZeroAllocation(C, CE, 1, State); } else if (FunI == II_reallocf) { - State = ReallocMem(C, CE, true, State); + State = ReallocMemAux(C, CE, true, State); State = ProcessZeroAllocation(C, CE, 1, State); } else if (FunI == II_calloc) { State = CallocMem(C, CE, State); @@ -874,6 +903,25 @@ void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const { return; State = MallocMemAux(C, CE, CE->getArg(1), UndefinedVal(), State); State = ProcessZeroAllocation(C, CE, 1, State); + } else if (FunI == II_g_malloc_n || FunI == II_g_try_malloc_n || + FunI == II_g_malloc0_n || FunI == II_g_try_malloc0_n) { + if (CE->getNumArgs() < 2) + return; + SVal Init = UndefinedVal(); + if (FunI == II_g_malloc0_n || FunI == II_g_try_malloc0_n) { + SValBuilder &SB = C.getSValBuilder(); + Init = SB.makeZeroVal(SB.getContext().CharTy); + } + SVal TotalSize = evalMulForBufferSize(C, CE->getArg(0), CE->getArg(1)); + State = MallocMemAux(C, CE, TotalSize, Init, State); + State = ProcessZeroAllocation(C, CE, 0, State); + State = ProcessZeroAllocation(C, CE, 1, State); + } else if (FunI == II_g_realloc_n || FunI == II_g_try_realloc_n) { + if (CE->getNumArgs() < 3) + return; + State = ReallocMemAux(C, CE, false, State, true); + State = ProcessZeroAllocation(C, CE, 1, State); + State = ProcessZeroAllocation(C, CE, 2, State); } } @@ -1976,14 +2024,17 @@ void MallocChecker::ReportUseZeroAllocated(CheckerContext &C, } } -ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C, - const CallExpr *CE, - bool FreesOnFail, - ProgramStateRef State) const { +ProgramStateRef MallocChecker::ReallocMemAux(CheckerContext &C, + const CallExpr *CE, + bool FreesOnFail, + ProgramStateRef State, + bool SuffixWithN) const { if (!State) return nullptr; - if (CE->getNumArgs() < 2) + if (SuffixWithN && CE->getNumArgs() < 3) + return nullptr; + else if (CE->getNumArgs() < 2) return nullptr; const Expr *arg0Expr = CE->getArg(0); @@ -1998,20 +2049,19 @@ ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C, DefinedOrUnknownSVal PtrEQ = svalBuilder.evalEQ(State, arg0Val, svalBuilder.makeNull()); - // Get the size argument. If there is no size arg then give up. + // Get the size argument. const Expr *Arg1 = CE->getArg(1); - if (!Arg1) - return nullptr; // Get the value of the size argument. - SVal Arg1ValG = State->getSVal(Arg1, LCtx); - if (!Arg1ValG.getAs()) + SVal TotalSize = State->getSVal(Arg1, LCtx); + if (SuffixWithN) + TotalSize = evalMulForBufferSize(C, Arg1, CE->getArg(2)); + if (!TotalSize.getAs()) return nullptr; - DefinedOrUnknownSVal Arg1Val = Arg1ValG.castAs(); // Compare the size argument to 0. DefinedOrUnknownSVal SizeZero = - svalBuilder.evalEQ(State, Arg1Val, + svalBuilder.evalEQ(State, TotalSize.castAs(), svalBuilder.makeIntValWithPtrWidth(0, false)); ProgramStateRef StatePtrIsNull, StatePtrNotNull; @@ -2025,8 +2075,8 @@ ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C, // If the ptr is NULL and the size is not 0, the call is equivalent to // malloc(size). - if ( PrtIsNull && !SizeIsZero) { - ProgramStateRef stateMalloc = MallocMemAux(C, CE, CE->getArg(1), + if (PrtIsNull && !SizeIsZero) { + ProgramStateRef stateMalloc = MallocMemAux(C, CE, TotalSize, UndefinedVal(), StatePtrIsNull); return stateMalloc; } @@ -2059,7 +2109,7 @@ ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C, if (ProgramStateRef stateFree = FreeMemAux(C, CE, State, 0, false, ReleasedAllocated)) { - ProgramStateRef stateRealloc = MallocMemAux(C, CE, CE->getArg(1), + ProgramStateRef stateRealloc = MallocMemAux(C, CE, TotalSize, UnknownVal(), stateFree); if (!stateRealloc) return nullptr; @@ -2090,12 +2140,8 @@ ProgramStateRef MallocChecker::CallocMem(CheckerContext &C, const CallExpr *CE, return nullptr; SValBuilder &svalBuilder = C.getSValBuilder(); - const LocationContext *LCtx = C.getLocationContext(); - SVal count = State->getSVal(CE->getArg(0), LCtx); - SVal elementSize = State->getSVal(CE->getArg(1), LCtx); - SVal TotalSize = svalBuilder.evalBinOp(State, BO_Mul, count, elementSize, - svalBuilder.getContext().getSizeType()); SVal zeroVal = svalBuilder.makeZeroVal(svalBuilder.getContext().CharTy); + SVal TotalSize = evalMulForBufferSize(C, CE->getArg(0), CE->getArg(1)); return MallocMemAux(C, CE, TotalSize, zeroVal, State); } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 7309741d22e3..d00182a871c1 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -61,7 +61,9 @@ const Expr *bugreporter::getDerefExpr(const Stmt *S) { return U->getSubExpr()->IgnoreParenCasts(); } else if (const MemberExpr *ME = dyn_cast(E)) { - if (ME->isArrow() || isDeclRefExprToReference(ME->getBase())) { + if (ME->isImplicitAccess()) { + return ME; + } else if (ME->isArrow() || isDeclRefExprToReference(ME->getBase())) { return ME->getBase()->IgnoreParenCasts(); } else { // If we have a member expr with a dot, the base must have been @@ -73,9 +75,9 @@ const Expr *bugreporter::getDerefExpr(const Stmt *S) { return IvarRef->getBase()->IgnoreParenCasts(); } else if (const ArraySubscriptExpr *AE = dyn_cast(E)) { - return AE->getBase(); + return getDerefExpr(AE->getBase()); } - else if (isDeclRefExprToReference(E)) { + else if (isa(E)) { return E; } break; @@ -961,7 +963,24 @@ bool bugreporter::trackNullOrUndefValue(const ExplodedNode *N, const Expr *Inner = nullptr; if (const Expr *Ex = dyn_cast(S)) { Ex = Ex->IgnoreParenCasts(); - if (ExplodedGraph::isInterestingLValueExpr(Ex) || CallEvent::isCallStmt(Ex)) + + // Performing operator `&' on an lvalue expression is essentially a no-op. + // Then, if we are taking addresses of fields or elements, these are also + // unlikely to matter. + // FIXME: There's a hack in our Store implementation that always computes + // field offsets around null pointers as if they are always equal to 0. + // The idea here is to report accesses to fields as null dereferences + // even though the pointer value that's being dereferenced is actually + // the offset of the field rather than exactly 0. + // See the FIXME in StoreManager's getLValueFieldOrIvar() method. + // This code interacts heavily with this hack; otherwise the value + // would not be null at all for most fields, so we'd be unable to track it. + if (const auto *Op = dyn_cast(Ex)) + if (Op->getOpcode() == UO_AddrOf && Op->getSubExpr()->isLValue()) + if (const Expr *DerefEx = getDerefExpr(Op->getSubExpr())) + Ex = DerefEx; + + if (Ex && (ExplodedGraph::isInterestingLValueExpr(Ex) || CallEvent::isCallStmt(Ex))) Inner = Ex; } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 9e6ec09010e9..8ee34190891a 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1904,8 +1904,8 @@ void ExprEngine::processSwitch(SwitchNodeBuilder& builder) { // Evaluate the LHS of the case value. llvm::APSInt V1 = Case->getLHS()->EvaluateKnownConstInt(getContext()); - assert(V1.getBitWidth() == getContext().getTypeSize(CondE->getType())); - + assert(V1.getBitWidth() == getContext().getIntWidth(CondE->getType())); + // Get the RHS of the case, if it exists. llvm::APSInt V2; if (const Expr *E = Case->getRHS()) diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index dd7e9dd11781..3000e13d32c6 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -1338,6 +1338,9 @@ RegionStoreManager::getSizeInElements(ProgramStateRef state, /// the array). This is called by ExprEngine when evaluating casts /// from arrays to pointers. SVal RegionStoreManager::ArrayToPointer(Loc Array, QualType T) { + if (Array.getAs()) + return Array; + if (!Array.getAs()) return UnknownVal(); diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp index ba48a60d5a1c..1af49f68cc05 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -404,9 +404,15 @@ SVal StoreManager::getLValueFieldOrIvar(const Decl *D, SVal Base) { case loc::ConcreteIntKind: // While these seem funny, this can happen through casts. - // FIXME: What we should return is the field offset. For example, - // add the field offset to the integer value. That way funny things + // FIXME: What we should return is the field offset, not base. For example, + // add the field offset to the integer value. That way things // like this work properly: &(((struct foo *) 0xa)->f) + // However, that's not easy to fix without reducing our abilities + // to catch null pointer dereference. Eg., ((struct foo *)0x0)->f = 7 + // is a null dereference even though we're dereferencing offset of f + // rather than null. Coming up with an approach that computes offsets + // over null pointers properly while still being able to catch null + // dereferences might be worth it. return Base; default: @@ -431,7 +437,7 @@ SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset, // If the base is an unknown or undefined value, just return it back. // FIXME: For absolute pointer addresses, we just return that value back as // well, although in reality we should return the offset added to that - // value. + // value. See also the similar FIXME in getLValueFieldOrIvar(). if (Base.isUnknownOrUndef() || Base.getAs()) return Base; diff --git a/contrib/llvm/tools/clang/lib/Tooling/Refactoring/CMakeLists.txt b/contrib/llvm/tools/clang/lib/Tooling/Refactoring/CMakeLists.txt deleted file mode 100644 index b2f9b4f4c0cd..000000000000 --- a/contrib/llvm/tools/clang/lib/Tooling/Refactoring/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Option - Support - ) - -add_clang_library(clangToolingRefactor - AtomicChange.cpp - - LINK_LIBS - clangBasic - clangToolingCore - ) diff --git a/contrib/llvm/tools/clang/tools/clang-format/ClangFormat.cpp b/contrib/llvm/tools/clang/tools/clang-format/ClangFormat.cpp index ac0d0a8512f4..14bff19a1a0c 100644 --- a/contrib/llvm/tools/clang/tools/clang-format/ClangFormat.cpp +++ b/contrib/llvm/tools/clang/tools/clang-format/ClangFormat.cpp @@ -276,14 +276,17 @@ static bool format(StringRef FileName) { } // Get new affected ranges after sorting `#includes`. Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges); - bool IncompleteFormat = false; + FormattingAttemptStatus Status; Replacements FormatChanges = reformat(*FormatStyle, *ChangedCode, Ranges, - AssumedFileName, &IncompleteFormat); + AssumedFileName, &Status); Replaces = Replaces.merge(FormatChanges); if (OutputXML) { outs() << "\n\n"; + << (Status.FormatComplete ? "false" : "true") << "'"; + if (!Status.FormatComplete) + outs() << " line=" << Status.Line; + outs() << ">\n"; if (Cursor.getNumOccurrences() != 0) outs() << "" << FormatChanges.getShiftedCodePosition(CursorPosition) @@ -307,11 +310,15 @@ static bool format(StringRef FileName) { if (Rewrite.overwriteChangedFiles()) return true; } else { - if (Cursor.getNumOccurrences() != 0) + if (Cursor.getNumOccurrences() != 0) { outs() << "{ \"Cursor\": " << FormatChanges.getShiftedCodePosition(CursorPosition) << ", \"IncompleteFormat\": " - << (IncompleteFormat ? "true" : "false") << " }\n"; + << (Status.FormatComplete ? "false" : "true"); + if (!Status.FormatComplete) + outs() << ", \"Line\": " << Status.Line; + outs() << " }\n"; + } Rewrite.getEditBuffer(ID).write(outs()); } } diff --git a/contrib/llvm/tools/llc/llc.cpp b/contrib/llvm/tools/llc/llc.cpp index 43f97f112f6b..7c81abaed755 100644 --- a/contrib/llvm/tools/llc/llc.cpp +++ b/contrib/llvm/tools/llc/llc.cpp @@ -90,6 +90,11 @@ OptLevel("O", static cl::opt TargetTriple("mtriple", cl::desc("Override target triple for module")); +static cl::opt SplitDwarfFile( + "split-dwarf-file", + cl::desc( + "Specify the name of the .dwo file to encode in the DWARF output")); + static cl::opt NoVerify("disable-verify", cl::Hidden, cl::desc("Do not verify input module")); @@ -450,6 +455,7 @@ static int compileModule(char **argv, LLVMContext &Context) { Options.MCOptions.AsmVerbose = AsmVerbose; Options.MCOptions.PreserveAsmComments = PreserveComments; Options.MCOptions.IASSearchPaths = IncludeDirs; + Options.MCOptions.SplitDwarfFile = SplitDwarfFile; std::unique_ptr Target( TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, FeaturesStr, diff --git a/contrib/llvm/tools/lld/COFF/Chunks.cpp b/contrib/llvm/tools/lld/COFF/Chunks.cpp index 10eeedd88e55..2e49f417a206 100644 --- a/contrib/llvm/tools/lld/COFF/Chunks.cpp +++ b/contrib/llvm/tools/lld/COFF/Chunks.cpp @@ -319,8 +319,48 @@ void SEHTableChunk::writeTo(uint8_t *Buf) const { std::sort(Begin, Begin + Cnt); } -// Windows-specific. -// This class represents a block in .reloc section. +// Windows-specific. This class represents a block in .reloc section. +// The format is described here. +// +// On Windows, each DLL is linked against a fixed base address and +// usually loaded to that address. However, if there's already another +// DLL that overlaps, the loader has to relocate it. To do that, DLLs +// contain .reloc sections which contain offsets that need to be fixed +// up at runtime. If the loader find that a DLL cannot be loaded to its +// desired base address, it loads it to somewhere else, and add - to each offset that is +// specified by .reloc section. +// +// In ELF terms, .reloc sections contain arrays of relocation offsets. +// All these offsets in the section are implicitly R_*_RELATIVE, and +// addends are read from section contents (so it is REL as opposed to +// RELA). +// +// This already reduce the size of relocations to 1/3 compared to ELF +// .dynrel, but Windows does more to reduce it (probably because it was +// invented for PCs in the late '80s or early '90s.) Offsets in .reloc +// are grouped by page where page size is 16 bits, and offsets sharing +// the same page address are stored consecutively to represent them with +// less space. This is a very similar to the page table which is grouped +// by (multiple stages of) pages. +// +// For example, let's say we have 0x00030, 0x00500, 0x01000, 0x01100, +// 0x20004, and 0x20008 in a .reloc section. In the section, they are +// represented like this: +// +// 0x00000 -- page address (4 bytes) +// 16 -- size of this block (4 bytes) +// 0x0030 -- entries (2 bytes each) +// 0x0500 +// 0x1000 +// 0x1100 +// 0x20000 -- page address (4 bytes) +// 12 -- size of this block (4 bytes) +// 0x0004 -- entries (2 bytes each) +// 0x0008 +// +// Usually we have a lot of relocatinos for each page, so the number of +// bytes for one .reloc entry is close to 2 bytes. BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) { // Block header consists of 4 byte page RVA and 4 byte block size. // Each entry is 2 byte. Last entry may be padding. diff --git a/contrib/llvm/tools/lld/COFF/Config.h b/contrib/llvm/tools/lld/COFF/Config.h index 31534aeb3971..fafd3bcde2e3 100644 --- a/contrib/llvm/tools/lld/COFF/Config.h +++ b/contrib/llvm/tools/lld/COFF/Config.h @@ -43,6 +43,7 @@ struct Export { bool Noname = false; bool Data = false; bool Private = false; + bool Constant = false; // If an export is a form of /export:foo=dllname.bar, that means // that foo should be exported as an alias to bar in the DLL. diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp index daddfb86d4cf..5a15b5b11507 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.cpp +++ b/contrib/llvm/tools/lld/COFF/Driver.cpp @@ -512,6 +512,23 @@ void LinkerDriver::invokeMSVC(opt::InputArgList &Args) { std::string Rsp = "/nologo\n"; std::vector Temps; + // Write out archive members that we used in symbol resolution and pass these + // to MSVC before any archives, so that MSVC uses the same objects to satisfy + // references. + for (const auto *O : Symtab.ObjectFiles) { + if (O->ParentName.empty()) + continue; + SmallString<128> S; + int Fd; + if (auto EC = sys::fs::createTemporaryFile( + "lld-" + sys::path::filename(O->ParentName), ".obj", Fd, S)) + fatal(EC, "cannot create a temporary file"); + raw_fd_ostream OS(Fd, /*shouldClose*/ true); + OS << O->MB.getBuffer(); + Temps.push_back(S.str()); + Rsp += quote(S) + "\n"; + } + for (auto *Arg : Args) { switch (Arg->getOption().getID()) { case OPT_linkrepro: diff --git a/contrib/llvm/tools/lld/COFF/Driver.h b/contrib/llvm/tools/lld/COFF/Driver.h index 4566f73eef31..ad725625e030 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.h +++ b/contrib/llvm/tools/lld/COFF/Driver.h @@ -48,7 +48,7 @@ class ArgParser { llvm::opt::InputArgList parse(llvm::ArrayRef Args); // Concatenate LINK environment varirable and given arguments and parse them. - llvm::opt::InputArgList parseLINK(llvm::ArrayRef Args); + llvm::opt::InputArgList parseLINK(std::vector Args); // Tokenizes a given string and then parses as command line options. llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); } diff --git a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp index 2c9ba797f73b..252590c7d870 100644 --- a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp +++ b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp @@ -479,6 +479,10 @@ Export parseExport(StringRef Arg) { E.Data = true; continue; } + if (Tok.equals_lower("constant")) { + E.Constant = true; + continue; + } if (Tok.equals_lower("private")) { E.Private = true; continue; @@ -695,17 +699,20 @@ opt::InputArgList ArgParser::parse(ArrayRef ArgsArr) { return Args; } -// link.exe has an interesting feature. If LINK environment exists, -// its contents are handled as a command line string. So you can pass -// extra arguments using the environment variable. -opt::InputArgList ArgParser::parseLINK(ArrayRef Args) { +// link.exe has an interesting feature. If LINK or _LINK_ environment +// variables exist, their contents are handled as command line strings. +// So you can pass extra arguments using them. +opt::InputArgList ArgParser::parseLINK(std::vector Args) { // Concatenate LINK env and command line arguments, and then parse them. - Optional Env = Process::GetEnv("LINK"); - if (!Env) - return parse(Args); - std::vector V = tokenize(*Env); - V.insert(V.end(), Args.begin(), Args.end()); - return parse(V); + if (Optional S = Process::GetEnv("LINK")) { + std::vector V = tokenize(*S); + Args.insert(Args.begin(), V.begin(), V.end()); + } + if (Optional S = Process::GetEnv("_LINK_")) { + std::vector V = tokenize(*S); + Args.insert(Args.begin(), V.begin(), V.end()); + } + return parse(Args); } std::vector ArgParser::tokenize(StringRef S) { diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.h b/contrib/llvm/tools/lld/COFF/InputFiles.h index 9e02b2fc68bb..3078de687525 100644 --- a/contrib/llvm/tools/lld/COFF/InputFiles.h +++ b/contrib/llvm/tools/lld/COFF/InputFiles.h @@ -58,6 +58,8 @@ class InputFile { // Returns the CPU type this file was compiled to. virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } + MemoryBufferRef MB; + // An archive file name if this file is created from an archive. StringRef ParentName; @@ -67,7 +69,6 @@ class InputFile { protected: InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} - MemoryBufferRef MB; std::string Directives; private: diff --git a/contrib/llvm/tools/lld/COFF/Librarian.cpp b/contrib/llvm/tools/lld/COFF/Librarian.cpp index 3ce72822180b..91316ee6b0c9 100644 --- a/contrib/llvm/tools/lld/COFF/Librarian.cpp +++ b/contrib/llvm/tools/lld/COFF/Librarian.cpp @@ -162,7 +162,7 @@ class ObjectFactory { // Create a short import file which is described in PE/COFF spec 7. Import // Library Format. NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, - ImportNameType NameType, bool isData); + ImportType Type, ImportNameType NameType); }; } @@ -440,8 +440,8 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, uint16_t Ordinal, - ImportNameType NameType, - bool isData) { + ImportType ImportType, + ImportNameType NameType) { size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs size_t Size = sizeof(coff_import_header) + ImpSize; char *Buf = Alloc.Allocate(Size); @@ -456,8 +456,7 @@ NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, Imp->SizeOfData = ImpSize; if (Ordinal > 0) Imp->OrdinalHint = Ordinal; - Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE); - Imp->TypeInfo |= NameType << 2; + Imp->TypeInfo = (NameType << 2) | ImportType; // Write symbol name and DLL name. memcpy(P, Sym.data(), Sym.size()); @@ -490,11 +489,18 @@ void lld::coff::writeImportLibrary() { if (E.Private) continue; - ImportNameType Type = getNameType(E.SymbolName, E.Name); + ImportType ImportType = IMPORT_CODE; + if (E.Data) + ImportType = IMPORT_DATA; + if (E.Constant) + ImportType = IMPORT_CONST; + + ImportNameType NameType = getNameType(E.SymbolName, E.Name); std::string Name = E.ExtName.empty() ? std::string(E.SymbolName) : replace(E.SymbolName, E.Name, E.ExtName); - Members.push_back(OF.createShortImport(Name, E.Ordinal, Type, E.Data)); + Members.push_back(OF.createShortImport(Name, E.Ordinal, ImportType, + NameType)); } std::pair Result = diff --git a/contrib/llvm/tools/lld/COFF/ModuleDef.cpp b/contrib/llvm/tools/lld/COFF/ModuleDef.cpp index c9a40ac5ab8c..740ce867a7c4 100644 --- a/contrib/llvm/tools/lld/COFF/ModuleDef.cpp +++ b/contrib/llvm/tools/lld/COFF/ModuleDef.cpp @@ -38,6 +38,7 @@ enum Kind { Comma, Equal, KwBase, + KwConstant, KwData, KwExports, KwHeapsize, @@ -92,6 +93,7 @@ class Lexer { StringRef Word = Buf.substr(0, End); Kind K = llvm::StringSwitch(Word) .Case("BASE", KwBase) + .Case("CONSTANT", KwConstant) .Case("DATA", KwData) .Case("EXPORTS", KwExports) .Case("HEAPSIZE", KwHeapsize) @@ -227,6 +229,11 @@ class Parser { E.Data = true; continue; } + if (Tok.K == KwConstant) { + warn("CONSTANT keyword is obsolete; use DATA"); + E.Constant = true; + continue; + } if (Tok.K == KwPrivate) { E.Private = true; continue; diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.h b/contrib/llvm/tools/lld/COFF/SymbolTable.h index 764dd5318775..bf8d6618d964 100644 --- a/contrib/llvm/tools/lld/COFF/SymbolTable.h +++ b/contrib/llvm/tools/lld/COFF/SymbolTable.h @@ -108,14 +108,9 @@ class SymbolTable { std::vector LocalImportChunks; private: - void readArchive(); - void readObjects(); - std::pair insert(StringRef Name); StringRef findByPrefix(StringRef Prefix); - void addCombinedLTOObject(ObjectFile *Obj); - llvm::DenseMap Symtab; std::vector BitcodeFiles; diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp index 93924e4554c9..68eb5616a5c6 100644 --- a/contrib/llvm/tools/lld/ELF/Driver.cpp +++ b/contrib/llvm/tools/lld/ELF/Driver.cpp @@ -184,8 +184,6 @@ void LinkerDriver::addFile(StringRef Path, bool WithLOption) { error("attempted static link of dynamic object " + Path); return; } - Files.push_back(createSharedFile(MBRef)); - // DSOs usually have DT_SONAME tags in their ELF headers, and the // sonames are used to identify DSOs. But if they are missing, // they are identified by filenames. We don't know whether the new @@ -196,8 +194,8 @@ void LinkerDriver::addFile(StringRef Path, bool WithLOption) { // If a file was specified by -lfoo, the directory part is not // significant, as a user did not specify it. This behavior is // compatible with GNU. - Files.back()->DefaultSoName = - WithLOption ? sys::path::filename(Path) : Path; + Files.push_back(createSharedFile( + MBRef, WithLOption ? sys::path::filename(Path) : Path)); return; default: if (InLib) @@ -708,10 +706,6 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { if (!Config->Shared && !Config->AuxiliaryList.empty()) error("-f may not be used without -shared"); - for (auto *Arg : Args.filtered(OPT_dynamic_list)) - if (Optional Buffer = readFile(Arg->getValue())) - readDynamicList(*Buffer); - if (auto *Arg = Args.getLastArg(OPT_symbol_ordering_file)) if (Optional Buffer = readFile(Arg->getValue())) Config->SymbolOrderingFile = getLines(*Buffer); @@ -726,22 +720,32 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { {S, /*IsExternCpp*/ false, /*HasWildcard*/ false}); } - for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) - Config->VersionScriptGlobals.push_back( - {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false}); + bool HasExportDynamic = + getArg(Args, OPT_export_dynamic, OPT_no_export_dynamic, false); - // Dynamic lists are a simplified linker script that doesn't need the - // "global:" and implicitly ends with a "local:*". Set the variables needed to - // simulate that. - if (Args.hasArg(OPT_dynamic_list) || Args.hasArg(OPT_export_dynamic_symbol)) { - Config->ExportDynamic = true; - if (!Config->Shared) - Config->DefaultSymbolVersion = VER_NDX_LOCAL; + // Parses -dynamic-list and -export-dynamic-symbol. They make some + // symbols private. Note that -export-dynamic takes precedence over them + // as it says all symbols should be exported. + if (!HasExportDynamic) { + for (auto *Arg : Args.filtered(OPT_dynamic_list)) + if (Optional Buffer = readFile(Arg->getValue())) + readDynamicList(*Buffer); + + for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) + Config->VersionScriptGlobals.push_back( + {Arg->getValue(), /*IsExternCpp*/ false, /*HasWildcard*/ false}); + + // Dynamic lists are a simplified linker script that doesn't need the + // "global:" and implicitly ends with a "local:*". Set the variables + // needed to simulate that. + if (Args.hasArg(OPT_dynamic_list) || + Args.hasArg(OPT_export_dynamic_symbol)) { + Config->ExportDynamic = true; + if (!Config->Shared) + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + } } - if (getArg(Args, OPT_export_dynamic, OPT_no_export_dynamic, false)) - Config->DefaultSymbolVersion = VER_NDX_GLOBAL; - if (auto *Arg = Args.getLastArg(OPT_version_script)) if (Optional Buffer = readFile(Arg->getValue())) readVersionScript(*Buffer); @@ -876,6 +880,21 @@ static uint64_t getImageBase(opt::InputArgList &Args) { return V; } +// Parses --defsym=alias option. +static std::vector> +getDefsym(opt::InputArgList &Args) { + std::vector> Ret; + for (auto *Arg : Args.filtered(OPT_defsym)) { + StringRef From; + StringRef To; + std::tie(From, To) = StringRef(Arg->getValue()).split('='); + if (!isValidCIdentifier(To)) + error("--defsym: symbol name expected, but got " + To); + Ret.push_back({From, To}); + } + return Ret; +} + // Do actual linking. Note that when this function is called, // all linker scripts have already been parsed. template void LinkerDriver::link(opt::InputArgList &Args) { @@ -893,9 +912,11 @@ template void LinkerDriver::link(opt::InputArgList &Args) { // Fail early if the output file or map file is not writable. If a user has a // long link, e.g. due to a large LTO link, they do not wish to run it and // find that it failed because there was a mistake in their command-line. - if (!isFileWritable(Config->OutputFile, "output file")) - return; - if (!isFileWritable(Config->MapFile, "map file")) + if (auto E = tryCreateFile(Config->OutputFile)) + error("cannot open output file " + Config->OutputFile + ": " + E.message()); + if (auto E = tryCreateFile(Config->MapFile)) + error("cannot open map file " + Config->MapFile + ": " + E.message()); + if (ErrorCount) return; // Use default entry point name if no name was given via the command @@ -941,6 +962,10 @@ template void LinkerDriver::link(opt::InputArgList &Args) { for (auto *Arg : Args.filtered(OPT_wrap)) Symtab.wrap(Arg->getValue()); + // Handle --defsym=sym=alias option. + for (std::pair &Def : getDefsym(Args)) + Symtab.alias(Def.first, Def.second); + // Now that we have a complete list of input files. // Beyond this point, no new files are added. // Aggregate all input sections into one place. diff --git a/contrib/llvm/tools/lld/ELF/Filesystem.cpp b/contrib/llvm/tools/lld/ELF/Filesystem.cpp index 75f7bda75a23..b63d521a83b0 100644 --- a/contrib/llvm/tools/lld/ELF/Filesystem.cpp +++ b/contrib/llvm/tools/lld/ELF/Filesystem.cpp @@ -13,7 +13,6 @@ #include "Filesystem.h" #include "Config.h" -#include "Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileOutputBuffer.h" #include @@ -58,22 +57,20 @@ void elf::unlinkAsync(StringRef Path) { std::thread([=] { ::remove(TempPath.str().str().c_str()); }).detach(); } -// Returns true if a given file seems to be writable. +// Simulate file creation to see if Path is writable. // // Determining whether a file is writable or not is amazingly hard, // and after all the only reliable way of doing that is to actually // create a file. But we don't want to do that in this function // because LLD shouldn't update any file if it will end in a failure. -// We also don't want to reimplement heuristics. So we'll let -// FileOutputBuffer do the work. +// We also don't want to reimplement heuristics to determine if a +// file is writable. So we'll let FileOutputBuffer do the work. // // FileOutputBuffer doesn't touch a desitnation file until commit() // is called. We use that class without calling commit() to predict // if the given file is writable. -bool elf::isFileWritable(StringRef Path, StringRef Desc) { - if (auto EC = FileOutputBuffer::create(Path, 1).getError()) { - error("cannot open " + Desc + " " + Path + ": " + EC.message()); - return false; - } - return true; +std::error_code elf::tryCreateFile(StringRef Path) { + if (Path.empty()) + return std::error_code(); + return FileOutputBuffer::create(Path, 1).getError(); } diff --git a/contrib/llvm/tools/lld/ELF/Filesystem.h b/contrib/llvm/tools/lld/ELF/Filesystem.h index a33dc3651a4a..d56d067f7378 100644 --- a/contrib/llvm/tools/lld/ELF/Filesystem.h +++ b/contrib/llvm/tools/lld/ELF/Filesystem.h @@ -15,7 +15,7 @@ namespace lld { namespace elf { void unlinkAsync(StringRef Path); -bool isFileWritable(StringRef Path, StringRef FileDescription); +std::error_code tryCreateFile(StringRef Path); } } diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp index d651fbcad253..d99f71eb4aae 100644 --- a/contrib/llvm/tools/lld/ELF/InputFiles.cpp +++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp @@ -608,8 +608,9 @@ ArchiveFile::getMember(const Archive::Symbol *Sym) { } template -SharedFile::SharedFile(MemoryBufferRef M) - : ELFFileBase(Base::SharedKind, M), AsNeeded(Config->AsNeeded) {} +SharedFile::SharedFile(MemoryBufferRef M, StringRef DefaultSoName) + : ELFFileBase(Base::SharedKind, M), SoName(DefaultSoName), + AsNeeded(Config->AsNeeded) {} template const typename ELFT::Shdr * @@ -619,12 +620,6 @@ SharedFile::getSection(const Elf_Sym &Sym) const { toString(this)); } -template StringRef SharedFile::getSoName() const { - if (SoName.empty()) - return this->DefaultSoName; - return SoName; -} - // Partially parse the shared object file so that we can call // getSoName on this object. template void SharedFile::parseSoName() { @@ -867,8 +862,23 @@ void BitcodeFile::parse(DenseSet &ComdatGroups) { Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, this)); } +// Small bit of template meta programming to handle the SharedFile constructor +// being the only one with a DefaultSoName parameter. +template