Merge llvm, clang, lld, lldb, compiler-rt and libc++ r308421, and update

build glue.
svn path=/projects/clang500-import/; revision=321238
2017-07-19 19:41:41 +00:00 · 2017-07-19 19:41:41 +00:00 · b40b48b876 · 2020-12-20 02:59:44 +00:00
commit b40b48b876
parent 03f072d13d 462d72ec21 93c91e39b2 de51d67148
532 changed files with 16648 additions and 6068 deletions
--- a/contrib/compiler-rt/lib/asan/asan_allocator.cc
+++ b/contrib/compiler-rt/lib/asan/asan_allocator.cc
@ -21,7 +21,9 @@
 #include "asan_report.h"
 #include "asan_stack.h"
 #include "asan_thread.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_list.h"
@ -799,11 +801,6 @@ void PrintInternalAllocatorStats() {
  instance.PrintStats();
 }

-void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
-                    AllocType alloc_type) {
-  return instance.Allocate(size, alignment, stack, alloc_type, true);
-}
-
 void asan_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
  instance.Deallocate(ptr, 0, stack, alloc_type);
 }
@ -814,16 +811,16 @@ void asan_sized_free(void *ptr, uptr size, BufferedStackTrace *stack,
 }

 void *asan_malloc(uptr size, BufferedStackTrace *stack) {
-  return instance.Allocate(size, 8, stack, FROM_MALLOC, true);
+  return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true));
 }

 void *asan_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
-  return instance.Calloc(nmemb, size, stack);
+  return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
 }

 void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack) {
  if (!p)
-    return instance.Allocate(size, 8, stack, FROM_MALLOC, true);
+    return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true));
  if (size == 0) {
    if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
      instance.Deallocate(p, 0, stack, FROM_MALLOC);
@ -832,26 +829,41 @@ void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack) {
    // Allocate a size of 1 if we shouldn't free() on Realloc to 0
    size = 1;
  }
-  return instance.Reallocate(p, size, stack);
+  return SetErrnoOnNull(instance.Reallocate(p, size, stack));
 }

 void *asan_valloc(uptr size, BufferedStackTrace *stack) {
-  return instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC, true);
+  return SetErrnoOnNull(
+      instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC, true));
 }

 void *asan_pvalloc(uptr size, BufferedStackTrace *stack) {
  uptr PageSize = GetPageSizeCached();
-  size = RoundUpTo(size, PageSize);
-  if (size == 0) {
-    // pvalloc(0) should allocate one page.
-    size = PageSize;
+  // pvalloc(0) should allocate one page.
+  size = size ? RoundUpTo(size, PageSize) : PageSize;
+  return SetErrnoOnNull(
+      instance.Allocate(size, PageSize, stack, FROM_MALLOC, true));
+}
+
+void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
+                    AllocType alloc_type) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    return AsanAllocator::FailureHandler::OnBadRequest();
  }
-  return instance.Allocate(size, PageSize, stack, FROM_MALLOC, true);
+  return SetErrnoOnNull(
+      instance.Allocate(size, alignment, stack, alloc_type, true));
 }

 int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
                        BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
+    AsanAllocator::FailureHandler::OnBadRequest();
+    return errno_EINVAL;
+  }
  void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC, true);
+  if (UNLIKELY(!ptr))
+    return errno_ENOMEM;
  CHECK(IsAligned((uptr)ptr, alignment));
  *memptr = ptr;
  return 0;
--- a/contrib/compiler-rt/lib/asan/asan_interceptors.cc
+++ b/contrib/compiler-rt/lib/asan/asan_interceptors.cc
@ -178,6 +178,10 @@ void SetThreadName(const char *name) {
 }

 int OnExit() {
+  if (CAN_SANITIZE_LEAKS && common_flags()->detect_leaks &&
+      __lsan::HasReportedLeaks()) {
+    return common_flags()->exitcode;
+  }
  // FIXME: ask frontend whether we need to return failure.
  return 0;
 }
--- a/contrib/compiler-rt/lib/builtins/cpu_model.c
+++ b/contrib/compiler-rt/lib/builtins/cpu_model.c
@ -190,8 +190,8 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
                                 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
                                 unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_X64)
 #if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
  // FIXME: should we save this for Clang?
  __asm__("movq\t%%rbx, %%rsi\n\t"
@ -200,6 +200,16 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
          : "a"(value), "c"(subleaf));
  return false;
+#elif defined(__i386__)
+  __asm__("movl\t%%ebx, %%esi\n\t"
+          "cpuid\n\t"
+          "xchgl\t%%ebx, %%esi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value), "c"(subleaf));
+  return false;
+#else
+  return true;
+#endif
 #elif defined(_MSC_VER)
  int registers[4];
  __cpuidex(registers, value, subleaf);
@ -211,35 +221,6 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
 #else
  return true;
 #endif
-#elif defined(__i386__) || defined(_M_IX86)
-#if defined(__GNUC__) || defined(__clang__)
-  __asm__("movl\t%%ebx, %%esi\n\t"
-          "cpuid\n\t"
-          "xchgl\t%%ebx, %%esi\n\t"
-          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
-          : "a"(value), "c"(subleaf));
-  return false;
-#elif defined(_MSC_VER)
-  __asm {
-      mov   eax,value
-      mov   ecx,subleaf
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-  }
-  return false;
-#else
-  return true;
-#endif
-#else
-  return true;
-#endif
 }

 // Read control register 0 (XCR0). Used to detect features such as AVX.
--- a/contrib/compiler-rt/lib/lsan/lsan_allocator.cc
+++ b/contrib/compiler-rt/lib/lsan/lsan_allocator.cc
@ -15,7 +15,9 @@
 #include "lsan_allocator.h"

 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
@ -86,6 +88,13 @@ void *Allocate(const StackTrace &stack, uptr size, uptr alignment,
  return p;
 }

+static void *Calloc(uptr nmemb, uptr size, const StackTrace &stack) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb)))
+    return Allocator::FailureHandler::OnBadRequest();
+  size *= nmemb;
+  return Allocate(stack, size, 1, true);
+}
+
 void Deallocate(void *p) {
  if (&__sanitizer_free_hook) __sanitizer_free_hook(p);
  RunFreeHooks(p);
@ -118,11 +127,15 @@ uptr GetMallocUsableSize(const void *p) {
 }

 void *lsan_memalign(uptr alignment, uptr size, const StackTrace &stack) {
-  return Allocate(stack, size, alignment, kAlwaysClearMemory);
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(Allocate(stack, size, alignment, kAlwaysClearMemory));
 }

 void *lsan_malloc(uptr size, const StackTrace &stack) {
-  return Allocate(stack, size, 1, kAlwaysClearMemory);
+  return SetErrnoOnNull(Allocate(stack, size, 1, kAlwaysClearMemory));
 }

 void lsan_free(void *p) {
@ -130,20 +143,16 @@ void lsan_free(void *p) {
 }

 void *lsan_realloc(void *p, uptr size, const StackTrace &stack) {
-  return Reallocate(stack, p, size, 1);
+  return SetErrnoOnNull(Reallocate(stack, p, size, 1));
 }

 void *lsan_calloc(uptr nmemb, uptr size, const StackTrace &stack) {
-  if (CheckForCallocOverflow(size, nmemb))
-    return Allocator::FailureHandler::OnBadRequest();
-  size *= nmemb;
-  return Allocate(stack, size, 1, true);
+  return SetErrnoOnNull(Calloc(nmemb, size, stack));
 }

 void *lsan_valloc(uptr size, const StackTrace &stack) {
-  if (size == 0)
-    size = GetPageSizeCached();
-  return Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory);
+  return SetErrnoOnNull(
+      Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory));
 }

 uptr lsan_mz_size(const void *p) {
--- a/contrib/compiler-rt/lib/lsan/lsan_common.cc
+++ b/contrib/compiler-rt/lib/lsan/lsan_common.cc
@ -576,18 +576,16 @@ static bool CheckForLeaks() {
  return false;
 }

+static bool has_reported_leaks = false;
+bool HasReportedLeaks() { return has_reported_leaks; }
+
 void DoLeakCheck() {
  BlockingMutexLock l(&global_mutex);
  static bool already_done;
  if (already_done) return;
  already_done = true;
-  bool have_leaks = CheckForLeaks();
-  if (!have_leaks) {
-    return;
-  }
-  if (common_flags()->exitcode) {
-    Die();
-  }
+  has_reported_leaks = CheckForLeaks();
+  if (has_reported_leaks) HandleLeaks();
 }

 static int DoRecoverableLeakCheck() {
--- a/contrib/compiler-rt/lib/lsan/lsan_common.h
+++ b/contrib/compiler-rt/lib/lsan/lsan_common.h
@ -226,6 +226,12 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p);
 // Return the linker module, if valid for the platform.
 LoadedModule *GetLinker();

+// Return true if LSan has finished leak checking and reported leaks.
+bool HasReportedLeaks();
+
+// Run platform-specific leak handlers.
+void HandleLeaks();
+
 // Wrapper for chunk metadata operations.
 class LsanMetadata {
 public:
--- a/contrib/compiler-rt/lib/lsan/lsan_common_linux.cc
+++ b/contrib/compiler-rt/lib/lsan/lsan_common_linux.cc
@ -100,6 +100,13 @@ struct DoStopTheWorldParam {
  void *argument;
 };

+// While calling Die() here is undefined behavior and can potentially
+// cause race conditions, it isn't possible to intercept exit on linux,
+// so we have no choice but to call Die() from the atexit handler.
+void HandleLeaks() {
+  if (common_flags()->exitcode) Die();
+}
+
 static int DoStopTheWorldCallback(struct dl_phdr_info *info, size_t size,
                                  void *data) {
  DoStopTheWorldParam *param = reinterpret_cast<DoStopTheWorldParam *>(data);
--- a/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc
+++ b/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc
@ -164,6 +164,11 @@ void ProcessPlatformSpecificAllocations(Frontier *frontier) {
  }
 }

+// On darwin, we can intercept _exit gracefully, and return a failing exit code
+// if required at that point. Calling Die() here is undefined behavior and
+// causes rare race conditions.
+void HandleLeaks() {}
+
 void DoStopTheWorld(StopTheWorldCallback callback, void *argument) {
  StopTheWorld(callback, argument);
 }
--- a/contrib/compiler-rt/lib/lsan/lsan_interceptors.cc
+++ b/contrib/compiler-rt/lib/lsan/lsan_interceptors.cc
@ -352,6 +352,11 @@ INTERCEPTOR(int, pthread_join, void *th, void **ret) {
  return res;
 }

+INTERCEPTOR(void, _exit, int status) {
+  if (status == 0 && HasReportedLeaks()) status = common_flags()->exitcode;
+  REAL(_exit)(status);
+}
+
 namespace __lsan {

 void InitializeInterceptors() {
@ -371,6 +376,7 @@ void InitializeInterceptors() {
  LSAN_MAYBE_INTERCEPT_MALLOPT;
  INTERCEPT_FUNCTION(pthread_create);
  INTERCEPT_FUNCTION(pthread_join);
+  INTERCEPT_FUNCTION(_exit);

  if (pthread_key_create(&g_thread_finalize_key, &thread_finalize)) {
    Report("LeakSanitizer: failed to create thread key.\n");
--- a/contrib/compiler-rt/lib/msan/msan.h
+++ b/contrib/compiler-rt/lib/msan/msan.h
@ -280,10 +280,18 @@ void InitializeInterceptors();

 void MsanAllocatorInit();
 void MsanAllocatorThreadFinish();
-void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size);
-void *MsanReallocate(StackTrace *stack, void *oldp, uptr size,
-                     uptr alignment, bool zeroise);
 void MsanDeallocate(StackTrace *stack, void *ptr);
+
+void *msan_malloc(uptr size, StackTrace *stack);
+void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack);
+void *msan_realloc(void *ptr, uptr size, StackTrace *stack);
+void *msan_valloc(uptr size, StackTrace *stack);
+void *msan_pvalloc(uptr size, StackTrace *stack);
+void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack);
+void *msan_memalign(uptr alignment, uptr size, StackTrace *stack);
+int msan_posix_memalign(void **memptr, uptr alignment, uptr size,
+                        StackTrace *stack);
+
 void InstallTrapHandler();
 void InstallAtExitHandler();

--- a/contrib/compiler-rt/lib/msan/msan_allocator.cc
+++ b/contrib/compiler-rt/lib/msan/msan_allocator.cc
@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//

 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "msan.h"
 #include "msan_allocator.h"
 #include "msan_origin.h"
@ -194,20 +196,8 @@ void MsanDeallocate(StackTrace *stack, void *p) {
  }
 }

-void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) {
-  if (CheckForCallocOverflow(size, nmemb))
-    return Allocator::FailureHandler::OnBadRequest();
-  return MsanReallocate(stack, nullptr, nmemb * size, sizeof(u64), true);
-}
-
 void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size,
-                     uptr alignment, bool zeroise) {
-  if (!old_p)
-    return MsanAllocate(stack, new_size, alignment, zeroise);
-  if (!new_size) {
-    MsanDeallocate(stack, old_p);
-    return nullptr;
-  }
+                     uptr alignment) {
  Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(old_p));
  uptr old_size = meta->requested_size;
  uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(old_p);
@ -215,10 +205,7 @@ void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size,
    // We are not reallocating here.
    meta->requested_size = new_size;
    if (new_size > old_size) {
-      if (zeroise) {
-        __msan_clear_and_unpoison((char *)old_p + old_size,
-                                  new_size - old_size);
-      } else if (flags()->poison_in_malloc) {
+      if (flags()->poison_in_malloc) {
        stack->tag = StackTrace::TAG_ALLOC;
        PoisonMemory((char *)old_p + old_size, new_size - old_size, stack);
      }
@ -226,8 +213,7 @@ void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size,
    return old_p;
  }
  uptr memcpy_size = Min(new_size, old_size);
-  void *new_p = MsanAllocate(stack, new_size, alignment, zeroise);
-  // Printf("realloc: old_size %zd new_size %zd\n", old_size, new_size);
+  void *new_p = MsanAllocate(stack, new_size, alignment, false /*zeroise*/);
  if (new_p) {
    CopyMemory(new_p, old_p, memcpy_size, stack);
    MsanDeallocate(stack, old_p);
@ -243,6 +229,67 @@ static uptr AllocationSize(const void *p) {
  return b->requested_size;
 }

+void *msan_malloc(uptr size, StackTrace *stack) {
+  return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false));
+}
+
+void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb)))
+    return SetErrnoOnNull(Allocator::FailureHandler::OnBadRequest());
+  return SetErrnoOnNull(MsanAllocate(stack, nmemb * size, sizeof(u64), true));
+}
+
+void *msan_realloc(void *ptr, uptr size, StackTrace *stack) {
+  if (!ptr)
+    return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false));
+  if (size == 0) {
+    MsanDeallocate(stack, ptr);
+    return nullptr;
+  }
+  return SetErrnoOnNull(MsanReallocate(stack, ptr, size, sizeof(u64)));
+}
+
+void *msan_valloc(uptr size, StackTrace *stack) {
+  return SetErrnoOnNull(MsanAllocate(stack, size, GetPageSizeCached(), false));
+}
+
+void *msan_pvalloc(uptr size, StackTrace *stack) {
+  uptr PageSize = GetPageSizeCached();
+  // pvalloc(0) should allocate one page.
+  size = size == 0 ? PageSize : RoundUpTo(size, PageSize);
+  return SetErrnoOnNull(MsanAllocate(stack, size, PageSize, false));
+}
+
+void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false));
+}
+
+void *msan_memalign(uptr alignment, uptr size, StackTrace *stack) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false));
+}
+
+int msan_posix_memalign(void **memptr, uptr alignment, uptr size,
+                        StackTrace *stack) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
+    Allocator::FailureHandler::OnBadRequest();
+    return errno_EINVAL;
+  }
+  void *ptr = MsanAllocate(stack, size, alignment, false);
+  if (UNLIKELY(!ptr))
+    return errno_ENOMEM;
+  CHECK(IsAligned((uptr)ptr, alignment));
+  *memptr = ptr;
+  return 0;
+}
+
 } // namespace __msan

 using namespace __msan;
--- a/contrib/compiler-rt/lib/msan/msan_interceptors.cc
+++ b/contrib/compiler-rt/lib/msan/msan_interceptors.cc
@ -161,58 +161,45 @@ INTERCEPTOR(void *, bcopy, const void *src, void *dest, SIZE_T n) {

 INTERCEPTOR(int, posix_memalign, void **memptr, SIZE_T alignment, SIZE_T size) {
  GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(alignment & (alignment - 1), 0);
  CHECK_NE(memptr, 0);
-  *memptr = MsanReallocate(&stack, nullptr, size, alignment, false);
-  CHECK_NE(*memptr, 0);
-  __msan_unpoison(memptr, sizeof(*memptr));
-  return 0;
+  int res = msan_posix_memalign(memptr, alignment, size, &stack);
+  if (!res)
+    __msan_unpoison(memptr, sizeof(*memptr));
+  return res;
 }

 #if !SANITIZER_FREEBSD
-INTERCEPTOR(void *, memalign, SIZE_T boundary, SIZE_T size) {
+INTERCEPTOR(void *, memalign, SIZE_T alignment, SIZE_T size) {
  GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(boundary & (boundary - 1), 0);
-  void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false);
-  return ptr;
+  return msan_memalign(alignment, size, &stack);
 }
 #define MSAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign)
 #else
 #define MSAN_MAYBE_INTERCEPT_MEMALIGN
 #endif

-INTERCEPTOR(void *, aligned_alloc, SIZE_T boundary, SIZE_T size) {
+INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) {
  GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(boundary & (boundary - 1), 0);
-  void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false);
-  return ptr;
+  return msan_aligned_alloc(alignment, size, &stack);
 }

-INTERCEPTOR(void *, __libc_memalign, SIZE_T boundary, SIZE_T size) {
+INTERCEPTOR(void *, __libc_memalign, SIZE_T alignment, SIZE_T size) {
  GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(boundary & (boundary - 1), 0);
-  void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false);
-  DTLS_on_libc_memalign(ptr, size);
+  void *ptr = msan_memalign(alignment, size, &stack);
+  if (ptr)
+    DTLS_on_libc_memalign(ptr, size);
  return ptr;
 }

 INTERCEPTOR(void *, valloc, SIZE_T size) {
  GET_MALLOC_STACK_TRACE;
-  void *ptr = MsanReallocate(&stack, nullptr, size, GetPageSizeCached(), false);
-  return ptr;
+  return msan_valloc(size, &stack);
 }

 #if !SANITIZER_FREEBSD
 INTERCEPTOR(void *, pvalloc, SIZE_T size) {
  GET_MALLOC_STACK_TRACE;
-  uptr PageSize = GetPageSizeCached();
-  size = RoundUpTo(size, PageSize);
-  if (size == 0) {
-    // pvalloc(0) should allocate one page.
-    size = PageSize;
-  }
-  void *ptr = MsanReallocate(&stack, nullptr, size, PageSize, false);
-  return ptr;
+  return msan_pvalloc(size, &stack);
 }
 #define MSAN_MAYBE_INTERCEPT_PVALLOC INTERCEPT_FUNCTION(pvalloc)
 #else
@ -853,7 +840,7 @@ INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) {
  if (UNLIKELY(!msan_inited))
    // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
    return AllocateFromLocalPool(nmemb * size);
-  return MsanCalloc(&stack, nmemb, size);
+  return msan_calloc(nmemb, size, &stack);
 }

 INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
@ -866,12 +853,12 @@ INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
      new_ptr = AllocateFromLocalPool(copy_size);
    } else {
      copy_size = size;
-      new_ptr = MsanReallocate(&stack, nullptr, copy_size, sizeof(u64), false);
+      new_ptr = msan_malloc(copy_size, &stack);
    }
    internal_memcpy(new_ptr, ptr, copy_size);
    return new_ptr;
  }
-  return MsanReallocate(&stack, ptr, size, sizeof(u64), false);
+  return msan_realloc(ptr, size, &stack);
 }

 INTERCEPTOR(void *, malloc, SIZE_T size) {
@ -879,7 +866,7 @@ INTERCEPTOR(void *, malloc, SIZE_T size) {
  if (UNLIKELY(!msan_inited))
    // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
    return AllocateFromLocalPool(size);
-  return MsanReallocate(&stack, nullptr, size, sizeof(u64), false);
+  return msan_malloc(size, &stack);
 }

 void __msan_allocated_memory(const void *data, uptr size) {
--- a/contrib/compiler-rt/lib/msan/msan_new_delete.cc
+++ b/contrib/compiler-rt/lib/msan/msan_new_delete.cc
@ -31,7 +31,7 @@ namespace std {
 // TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
 #define OPERATOR_NEW_BODY(nothrow) \
  GET_MALLOC_STACK_TRACE; \
-  void *res = MsanReallocate(&stack, 0, size, sizeof(u64), false);\
+  void *res = msan_malloc(size, &stack);\
  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
  return res

--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cc
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cc
@ -14,6 +14,7 @@

 #include "sanitizer_allocator.h"

+#include "sanitizer_allocator_checks.h"
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_atomic.h"
 #include "sanitizer_common.h"
@ -160,7 +161,7 @@ void *InternalRealloc(void *addr, uptr size, InternalAllocatorCache *cache) {
 }

 void *InternalCalloc(uptr count, uptr size, InternalAllocatorCache *cache) {
-  if (CheckForCallocOverflow(count, size))
+  if (UNLIKELY(CheckForCallocOverflow(count, size)))
    return InternalAllocator::FailureHandler::OnBadRequest();
  void *p = InternalAlloc(count * size, cache);
  if (p) internal_memset(p, 0, count * size);
@ -202,12 +203,6 @@ void SetLowLevelAllocateCallback(LowLevelAllocateCallback callback) {
  low_level_alloc_callback = callback;
 }

-bool CheckForCallocOverflow(uptr size, uptr n) {
-  if (!size) return false;
-  uptr max = (uptr)-1L;
-  return (max / size) < n;
-}
-
 static atomic_uint8_t allocator_out_of_memory = {0};
 static atomic_uint8_t allocator_may_return_null = {0};

--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h
@ -56,11 +56,6 @@ struct NoOpMapUnmapCallback {
 // Callback type for iterating over chunks.
 typedef void (*ForEachChunkCallback)(uptr chunk, void *arg);

-// Returns true if calloc(size, n) call overflows on size*n calculation.
-// The caller should "return POLICY::OnBadRequest();" where POLICY is the
-// current allocator failure handling policy.
-bool CheckForCallocOverflow(uptr size, uptr n);
-
 #include "sanitizer_allocator_size_class_map.h"
 #include "sanitizer_allocator_stats.h"
 #include "sanitizer_allocator_primary64.h"
--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_checks.h
@ -0,0 +1,64 @@
+//===-- sanitizer_allocator_checks.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Various checks shared between ThreadSanitizer, MemorySanitizer, etc. memory
+// allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_ALLOCATOR_CHECKS_H
+#define SANITIZER_ALLOCATOR_CHECKS_H
+
+#include "sanitizer_errno.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_common.h"
+#include "sanitizer_platform.h"
+
+namespace __sanitizer {
+
+// A common errno setting logic shared by almost all sanitizer allocator APIs.
+INLINE void *SetErrnoOnNull(void *ptr) {
+  if (UNLIKELY(!ptr))
+    errno = errno_ENOMEM;
+  return ptr;
+}
+
+// In case of the check failure, the caller of the following Check... functions
+// should "return POLICY::OnBadRequest();" where POLICY is the current allocator
+// failure handling policy.
+
+// Checks aligned_alloc() parameters, verifies that the alignment is a power of
+// two and that the size is a multiple of alignment for POSIX implementation,
+// and a bit relaxed requirement for non-POSIX ones, that the size is a multiple
+// of alignment.
+INLINE bool CheckAlignedAllocAlignmentAndSize(uptr alignment, uptr size) {
+#if SANITIZER_POSIX
+  return IsPowerOfTwo(alignment) && (size & (alignment - 1)) == 0;
+#else
+  return size % alignment == 0;
+#endif
+}
+
+// Checks posix_memalign() parameters, verifies that alignment is a power of two
+// and a multiple of sizeof(void *).
+INLINE bool CheckPosixMemalignAlignment(uptr alignment) {
+  return IsPowerOfTwo(alignment) && (alignment % sizeof(void *)) == 0; // NOLINT
+}
+
+// Returns true if calloc(size, n) call overflows on size*n calculation.
+INLINE bool CheckForCallocOverflow(uptr size, uptr n) {
+  if (!size)
+    return false;
+  uptr max = (uptr)-1L;
+  return (max / size) < n;
+}
+
+} // namespace __sanitizer
+
+#endif  // SANITIZER_ALLOCATOR_CHECKS_H
--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_errno.h
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_errno.h
@ -26,6 +26,8 @@
 #  define __errno_location __error
 #elif SANITIZER_ANDROID
 #  define __errno_location __errno
+#elif SANITIZER_WINDOWS
+#  define __errno_location _errno
 #endif

 extern "C" int *__errno_location();
--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc
@ -629,8 +629,7 @@ uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) {
 }
 #endif

-uptr internal_sigaltstack(const struct sigaltstack *ss,
-                         struct sigaltstack *oss) {
+uptr internal_sigaltstack(const void *ss, void *oss) {
  return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss);
 }

--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
@ -21,7 +21,6 @@
 #include "sanitizer_platform_limits_posix.h"

 struct link_map;  // Opaque type returned by dlopen().
-struct sigaltstack;

 namespace __sanitizer {
 // Dirent structure for getdents(). Note that this structure is different from
@ -30,8 +29,7 @@ struct linux_dirent;

 // Syscall wrappers.
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count);
-uptr internal_sigaltstack(const struct sigaltstack* ss,
-                          struct sigaltstack* oss);
+uptr internal_sigaltstack(const void* ss, void* oss);
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
    __sanitizer_sigset_t *oldset);

--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc
@ -805,14 +805,35 @@ char **GetArgv() {
 // fields only available in 10.12+. Declare the struct manually to be able to
 // build against older SDKs.
 struct __sanitizer_task_vm_info {
-  uptr _unused[(SANITIZER_WORDSIZE == 32) ? 20 : 19];
-  uptr min_address;
-  uptr max_address;
+  mach_vm_size_t virtual_size;
+  integer_t region_count;
+  integer_t page_size;
+  mach_vm_size_t resident_size;
+  mach_vm_size_t resident_size_peak;
+  mach_vm_size_t device;
+  mach_vm_size_t device_peak;
+  mach_vm_size_t internal;
+  mach_vm_size_t internal_peak;
+  mach_vm_size_t external;
+  mach_vm_size_t external_peak;
+  mach_vm_size_t reusable;
+  mach_vm_size_t reusable_peak;
+  mach_vm_size_t purgeable_volatile_pmap;
+  mach_vm_size_t purgeable_volatile_resident;
+  mach_vm_size_t purgeable_volatile_virtual;
+  mach_vm_size_t compressed;
+  mach_vm_size_t compressed_peak;
+  mach_vm_size_t compressed_lifetime;
+  mach_vm_size_t phys_footprint;
+  mach_vm_address_t min_address;
+  mach_vm_address_t max_address;
 };
+#define __SANITIZER_TASK_VM_INFO_COUNT ((mach_msg_type_number_t) \
+    (sizeof(__sanitizer_task_vm_info) / sizeof(natural_t)))

 uptr GetTaskInfoMaxAddress() {
-  __sanitizer_task_vm_info vm_info = {{0}, 0, 0};
-  mach_msg_type_number_t count = sizeof(vm_info) / sizeof(int);
+  __sanitizer_task_vm_info vm_info = {};
+  mach_msg_type_number_t count = __SANITIZER_TASK_VM_INFO_COUNT;
  int err = task_info(mach_task_self(), TASK_VM_INFO, (int *)&vm_info, &count);
  if (err == 0) {
    return vm_info.max_address - 1;
--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
@ -13,7 +13,7 @@
 #ifndef SANITIZER_PLATFORM_H
 #define SANITIZER_PLATFORM_H

-#if !defined(__linux__) && !defined(__FreeBSD__) && \
+#if !defined(__linux__) && !defined(__FreeBSD__) && !defined(__NetBSD__) && \
  !defined(__APPLE__) && !defined(_WIN32)
 # error "This operating system is not supported"
 #endif
@ -30,6 +30,12 @@
 # define SANITIZER_FREEBSD 0
 #endif

+#if defined(__NetBSD__)
+# define SANITIZER_NETBSD 1
+#else
+# define SANITIZER_NETBSD 0
+#endif
+
 #if defined(__APPLE__)
 # define SANITIZER_MAC     1
 # include <TargetConditionals.h>
@ -79,7 +85,8 @@
 # define SANITIZER_ANDROID 0
 #endif

-#define SANITIZER_POSIX (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC)
+#define SANITIZER_POSIX \
+  (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC || SANITIZER_NETBSD)

 #if __LP64__ || defined(_WIN64)
 #  define SANITIZER_WORDSIZE 64
--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@ -49,6 +49,12 @@
 # define SI_FREEBSD 0
 #endif

+#if SANITIZER_NETBSD
+# define SI_NETBSD 1
+#else
+# define SI_NETBSD 0
+#endif
+
 #if SANITIZER_LINUX
 # define SI_LINUX 1
 #else
@ -109,9 +115,9 @@
 // memmem on Darwin doesn't exist on 10.6
 // FIXME: enable memmem on Windows.
 #define SANITIZER_INTERCEPT_MEMMEM \
-  SI_NOT_WINDOWS && !SI_MAC_DEPLOYMENT_BELOW_10_7
+  (SI_NOT_WINDOWS && !SI_MAC_DEPLOYMENT_BELOW_10_7)
 #define SANITIZER_INTERCEPT_MEMCHR 1
-#define SANITIZER_INTERCEPT_MEMRCHR SI_FREEBSD || SI_LINUX
+#define SANITIZER_INTERCEPT_MEMRCHR (SI_FREEBSD || SI_LINUX || SI_NETBSD)

 #define SANITIZER_INTERCEPT_READ   SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_PREAD  SI_NOT_WINDOWS
@ -127,7 +133,8 @@
 #define SANITIZER_INTERCEPT_READV SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_WRITEV SI_NOT_WINDOWS

-#define SANITIZER_INTERCEPT_PREADV SI_FREEBSD || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PREADV \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PWRITEV SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PREADV64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PWRITEV64 SI_LINUX_NOT_ANDROID
@ -142,7 +149,7 @@

 #ifndef SANITIZER_INTERCEPT_PRINTF
 # define SANITIZER_INTERCEPT_PRINTF SI_NOT_WINDOWS
-# define SANITIZER_INTERCEPT_PRINTF_L SI_FREEBSD
+# define SANITIZER_INTERCEPT_PRINTF_L (SI_FREEBSD || SI_NETBSD)
 # define SANITIZER_INTERCEPT_ISOC99_PRINTF SI_LINUX_NOT_ANDROID
 #endif

@ -151,13 +158,14 @@

 #define SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETPWNAM_R_AND_FRIENDS \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_GETPWENT \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETPWENT_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SETPWENT SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_CLOCK_GETTIME SI_FREEBSD || SI_LINUX
+#define SANITIZER_INTERCEPT_GETPWENT_R \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_SETPWENT (SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_CLOCK_GETTIME (SI_FREEBSD || SI_NETBSD || SI_LINUX)
 #define SANITIZER_INTERCEPT_GETITIMER SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_TIME SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID
@ -168,10 +176,11 @@
 #define SANITIZER_INTERCEPT_GETNAMEINFO SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETSOCKNAME SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETHOSTBYNAME SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETHOSTBYNAME_R SI_FREEBSD || SI_LINUX
-#define SANITIZER_INTERCEPT_GETHOSTBYNAME2_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETHOSTBYADDR_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETHOSTENT_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_GETHOSTBYNAME_R (SI_FREEBSD || SI_LINUX)
+#define SANITIZER_INTERCEPT_GETHOSTBYNAME2_R \
+  (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_GETHOSTBYADDR_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_GETHOSTENT_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_GETSOCKOPT SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_ACCEPT SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_ACCEPT4 SI_LINUX_NOT_ANDROID
@ -197,63 +206,67 @@
 #define SANITIZER_INTERCEPT_GET_CURRENT_DIR_NAME SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_STRTOIMAX SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_MBSTOWCS SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_MBSNRTOWCS SI_MAC || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_MBSNRTOWCS (SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_WCSTOMBS SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_WCSNRTOMBS \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_WCRTOMB \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_TCGETATTR SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_REALPATH SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_CONFSTR \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_SCHED_GETAFFINITY SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SCHED_GETPARAM SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_STRERROR SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_STRERROR_R SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_XPG_STRERROR_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SCANDIR \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_SCANDIR64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_GETGROUPS SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_POLL SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_PPOLL SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_WORDEXP \
-  SI_FREEBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_SIGWAIT SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_SIGWAITINFO SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGTIMEDWAIT SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGSETOPS \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_SIGPENDING SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_SIGPROCMASK SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_BACKTRACE SI_FREEBSD || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_BACKTRACE \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_GETMNTENT SI_LINUX
 #define SANITIZER_INTERCEPT_GETMNTENT_R SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STATFS SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_STATFS \
+  (SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_STATFS64 \
-  (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STATVFS SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  ((SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_STATVFS \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_STATVFS64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_INITGROUPS SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_ETHER_NTOA_ATON SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_ETHER_HOST \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_ETHER_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SHMCTL \
-  ((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && SANITIZER_WORDSIZE == 64)
+  (SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_ETHER_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_SHMCTL                       \
+  (SI_NETBSD || ((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && \
+                 SANITIZER_WORDSIZE == 64))  // NOLINT
 #define SANITIZER_INTERCEPT_RANDOM_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GET SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSCHED \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETAFFINITY_NP SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPROTOCOL \
-  SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_MAC || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPRIOCEILING \
-  SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_MAC || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST_NP SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED SI_NOT_WINDOWS
@ -268,33 +281,36 @@
 #define SANITIZER_INTERCEPT_SINCOS SI_LINUX
 #define SANITIZER_INTERCEPT_REMQUO SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_LGAMMA SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_LGAMMA_R SI_FREEBSD || SI_LINUX
+#define SANITIZER_INTERCEPT_LGAMMA_R (SI_FREEBSD || SI_LINUX)
 #define SANITIZER_INTERCEPT_LGAMMAL_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_DRAND48_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_RAND_R \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_ICONV SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_ICONV \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_TIMES SI_NOT_WINDOWS

 // FIXME: getline seems to be available on OSX 10.7
-#define SANITIZER_INTERCEPT_GETLINE SI_FREEBSD || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_GETLINE \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)

-#define SANITIZER_INTERCEPT__EXIT SI_LINUX || SI_FREEBSD || SI_MAC
+#define SANITIZER_INTERCEPT__EXIT \
+  (SI_LINUX || SI_FREEBSD || SI_NETBSD || SI_MAC)

 #define SANITIZER_INTERCEPT_PHTREAD_MUTEX SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)

 #define SANITIZER_INTERCEPT_TLS_GET_ADDR \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)

 #define SANITIZER_INTERCEPT_LISTXATTR SI_LINUX
 #define SANITIZER_INTERCEPT_GETXATTR SI_LINUX
 #define SANITIZER_INTERCEPT_GETRESID SI_LINUX
 #define SANITIZER_INTERCEPT_GETIFADDRS \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC)
 #define SANITIZER_INTERCEPT_IF_INDEXTONAME \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC)
 #define SANITIZER_INTERCEPT_CAPGET SI_LINUX_NOT_ANDROID
 #if SI_LINUX && defined(__arm__)
 #define SANITIZER_INTERCEPT_AEABI_MEM 1
@ -302,55 +318,61 @@
 #define SANITIZER_INTERCEPT_AEABI_MEM 0
 #endif
 #define SANITIZER_INTERCEPT___BZERO SI_MAC
-#define SANITIZER_INTERCEPT_FTIME !SI_FREEBSD && SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_FTIME (!SI_FREEBSD && !SI_NETBSD && SI_NOT_WINDOWS)
 #define SANITIZER_INTERCEPT_XDR SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_TSEARCH SI_LINUX_NOT_ANDROID || SI_MAC
+#define SANITIZER_INTERCEPT_TSEARCH \
+  (SI_LINUX_NOT_ANDROID || SI_MAC || SI_NETBSD)
 #define SANITIZER_INTERCEPT_LIBIO_INTERNALS SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_FOPEN SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_FOPEN64 SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_OPEN_MEMSTREAM SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_OPEN_MEMSTREAM (SI_LINUX_NOT_ANDROID || SI_NETBSD)
 #define SANITIZER_INTERCEPT_OBSTACK SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_FFLUSH SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_FCLOSE SI_NOT_WINDOWS

 #ifndef SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
 #define SANITIZER_INTERCEPT_DLOPEN_DLCLOSE \
-    SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC)
 #endif

-#define SANITIZER_INTERCEPT_GETPASS SI_LINUX_NOT_ANDROID || SI_MAC
+#define SANITIZER_INTERCEPT_GETPASS \
+  (SI_LINUX_NOT_ANDROID || SI_MAC || SI_NETBSD)
 #define SANITIZER_INTERCEPT_TIMERFD SI_LINUX_NOT_ANDROID

 #define SANITIZER_INTERCEPT_MLOCKX SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_FOPENCOOKIE SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SEM SI_LINUX || SI_FREEBSD
+#define SANITIZER_INTERCEPT_SEM (SI_LINUX || SI_FREEBSD || SI_NETBSD)
 #define SANITIZER_INTERCEPT_PTHREAD_SETCANCEL SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_MINCORE SI_LINUX
+#define SANITIZER_INTERCEPT_MINCORE (SI_LINUX || SI_NETBSD)
 #define SANITIZER_INTERCEPT_PROCESS_VM_READV SI_LINUX
-#define SANITIZER_INTERCEPT_CTERMID SI_LINUX || SI_MAC || SI_FREEBSD
-#define SANITIZER_INTERCEPT_CTERMID_R SI_MAC || SI_FREEBSD
+#define SANITIZER_INTERCEPT_CTERMID \
+  (SI_LINUX || SI_MAC || SI_FREEBSD || SI_NETBSD)
+#define SANITIZER_INTERCEPT_CTERMID_R (SI_MAC || SI_FREEBSD)

-#define SANITIZER_INTERCEPTOR_HOOKS SI_LINUX || SI_MAC || SI_WINDOWS
+#define SANITIZER_INTERCEPTOR_HOOKS (SI_LINUX || SI_MAC || SI_WINDOWS)
 #define SANITIZER_INTERCEPT_RECV_RECVFROM SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_SEND_SENDTO SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX

-#define SANITIZER_INTERCEPT_STAT (SI_FREEBSD || SI_MAC || SI_ANDROID)
-#define SANITIZER_INTERCEPT___XSTAT !SANITIZER_INTERCEPT_STAT && SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_STAT \
+  (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD)
+#define SANITIZER_INTERCEPT___XSTAT \
+  (!SANITIZER_INTERCEPT_STAT && SI_NOT_WINDOWS)
 #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT
 #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID

-#define SANITIZER_INTERCEPT_UTMP SI_NOT_WINDOWS && !SI_MAC && !SI_FREEBSD
-#define SANITIZER_INTERCEPT_UTMPX SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD
+#define SANITIZER_INTERCEPT_UTMP (SI_NOT_WINDOWS && !SI_MAC && !SI_FREEBSD)
+#define SANITIZER_INTERCEPT_UTMPX (SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD)

 #define SANITIZER_INTERCEPT_GETLOADAVG \
-  SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD
+  (SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD || SI_NETBSD)

-#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO (!SI_FREEBSD && !SI_MAC)
-#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC)
-#define SANITIZER_INTERCEPT_PVALLOC (!SI_FREEBSD && !SI_MAC)
-#define SANITIZER_INTERCEPT_CFREE (!SI_FREEBSD && !SI_MAC)
+#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO \
+  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD)
+#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC && !SI_NETBSD)
+#define SANITIZER_INTERCEPT_PVALLOC (!SI_FREEBSD && !SI_MAC && !SI_NETBSD)
+#define SANITIZER_INTERCEPT_CFREE (!SI_FREEBSD && !SI_MAC && !SI_NETBSD)
 #define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC)
 #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC)
 #define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID
--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
@ -287,7 +287,7 @@ static int TracerThread(void* argument) {

  // Alternate stack for signal handling.
  InternalScopedBuffer<char> handler_stack_memory(kHandlerStackSize);
-  struct sigaltstack handler_stack;
+  stack_t handler_stack;
  internal_memset(&handler_stack, 0, sizeof(handler_stack));
  handler_stack.ss_sp = handler_stack_memory.data();
  handler_stack.ss_size = kHandlerStackSize;
--- a/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp
+++ b/contrib/compiler-rt/lib/scudo/scudo_allocator.cpp
@ -19,10 +19,11 @@
 #include "scudo_tls.h"
 #include "scudo_utils.h"

+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_quarantine.h"

-#include <errno.h>
 #include <string.h>

 namespace __scudo {
@ -73,7 +74,7 @@ struct ScudoChunk : UnpackedHeader {
  // beginning of the user data to the end of the backend allocated chunk.
  uptr getUsableSize(UnpackedHeader *Header) {
    uptr Size =
-        getBackendAllocator().GetActuallyAllocatedSize(getAllocBeg(Header),
+        getBackendAllocator().getActuallyAllocatedSize(getAllocBeg(Header),
                                                       Header->FromPrimary);
    if (Size == 0)
      return 0;
@ -232,7 +233,10 @@ struct QuarantineCallback {
    }
    Chunk->eraseHeader();
    void *Ptr = Chunk->getAllocBeg(&Header);
-    getBackendAllocator().Deallocate(Cache_, Ptr, Header.FromPrimary);
+    if (Header.FromPrimary)
+      getBackendAllocator().deallocatePrimary(Cache_, Ptr);
+    else
+      getBackendAllocator().deallocateSecondary(Ptr);
  }

  // Internal quarantine allocation and deallocation functions. We first check
@ -240,11 +244,11 @@ struct QuarantineCallback {
  // TODO(kostyak): figure out the best way to protect the batches.
  COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize);
  void *Allocate(uptr Size) {
-    return getBackendAllocator().Allocate(Cache_, Size, MinAlignment, true);
+    return getBackendAllocator().allocatePrimary(Cache_, Size);
  }

  void Deallocate(void *Ptr) {
-    getBackendAllocator().Deallocate(Cache_, Ptr, true);
+    getBackendAllocator().deallocatePrimary(Cache_, Ptr);
  }

  AllocatorCache *Cache_;
@ -277,6 +281,9 @@ struct ScudoAllocator {
  ScudoBackendAllocator BackendAllocator;
  ScudoQuarantine AllocatorQuarantine;

+  StaticSpinMutex GlobalPrngMutex;
+  ScudoPrng GlobalPrng;
+
  // The fallback caches are used when the thread local caches have been
  // 'detroyed' on thread tear-down. They are protected by a Mutex as they can
  // be accessed by different threads.
@ -303,10 +310,10 @@ struct ScudoAllocator {
    // result, the maximum offset will be at most the maximum alignment for the
    // last size class minus the header size, in multiples of MinAlignment.
    UnpackedHeader Header = {};
-    uptr MaxPrimaryAlignment = 1 << MostSignificantSetBitIndex(
-        SizeClassMap::kMaxSize - MinAlignment);
-    uptr MaxOffset = (MaxPrimaryAlignment - AlignedChunkHeaderSize) >>
-        MinAlignmentLog;
+    uptr MaxPrimaryAlignment =
+        1 << MostSignificantSetBitIndex(SizeClassMap::kMaxSize - MinAlignment);
+    uptr MaxOffset =
+        (MaxPrimaryAlignment - AlignedChunkHeaderSize) >> MinAlignmentLog;
    Header.Offset = MaxOffset;
    if (Header.Offset != MaxOffset) {
      dieWithMessage("ERROR: the maximum possible offset doesn't fit in the "
@ -328,13 +335,14 @@ struct ScudoAllocator {
    DeleteSizeMismatch = Options.DeleteSizeMismatch;
    ZeroContents = Options.ZeroContents;
    SetAllocatorMayReturnNull(Options.MayReturnNull);
-    BackendAllocator.Init(Options.ReleaseToOSIntervalMs);
+    BackendAllocator.init(Options.ReleaseToOSIntervalMs);
    AllocatorQuarantine.Init(
        static_cast<uptr>(Options.QuarantineSizeMb) << 20,
        static_cast<uptr>(Options.ThreadLocalQuarantineSizeKb) << 10);
-    BackendAllocator.InitCache(&FallbackAllocatorCache);
+    GlobalPrng.init();
+    Cookie = GlobalPrng.getU64();
+    BackendAllocator.initCache(&FallbackAllocatorCache);
    FallbackPrng.init();
-    Cookie = FallbackPrng.getU64();
  }

  // Helper function that checks for a valid Scudo chunk. nullptr isn't.
@ -374,28 +382,36 @@ struct ScudoAllocator {

    void *Ptr;
    u8 Salt;
-    uptr AllocationSize = FromPrimary ? AlignedSize : NeededSize;
-    uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment;
-    ScudoThreadContext *ThreadContext = getThreadContextAndLock();
-    if (LIKELY(ThreadContext)) {
-      Salt = getPrng(ThreadContext)->getU8();
-      Ptr = BackendAllocator.Allocate(getAllocatorCache(ThreadContext),
-                                      AllocationSize, AllocationAlignment,
-                                      FromPrimary);
-      ThreadContext->unlock();
+    uptr AllocSize;
+    if (FromPrimary) {
+      AllocSize = AlignedSize;
+      ScudoThreadContext *ThreadContext = getThreadContextAndLock();
+      if (LIKELY(ThreadContext)) {
+        Salt = getPrng(ThreadContext)->getU8();
+        Ptr = BackendAllocator.allocatePrimary(getAllocatorCache(ThreadContext),
+                                               AllocSize);
+        ThreadContext->unlock();
+      } else {
+        SpinMutexLock l(&FallbackMutex);
+        Salt = FallbackPrng.getU8();
+        Ptr = BackendAllocator.allocatePrimary(&FallbackAllocatorCache,
+                                               AllocSize);
+      }
    } else {
-      SpinMutexLock l(&FallbackMutex);
-      Salt = FallbackPrng.getU8();
-      Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, AllocationSize,
-                                      AllocationAlignment, FromPrimary);
+      {
+        SpinMutexLock l(&GlobalPrngMutex);
+        Salt = GlobalPrng.getU8();
+      }
+      AllocSize = NeededSize;
+      Ptr = BackendAllocator.allocateSecondary(AllocSize, Alignment);
    }
    if (UNLIKELY(!Ptr))
      return FailureHandler::OnOOM();

    // If requested, we will zero out the entire contents of the returned chunk.
    if ((ForceZeroContents || ZeroContents) && FromPrimary)
-       memset(Ptr, 0,
-              BackendAllocator.GetActuallyAllocatedSize(Ptr, FromPrimary));
+      memset(Ptr, 0, BackendAllocator.getActuallyAllocatedSize(
+          Ptr, /*FromPrimary=*/true));

    UnpackedHeader Header = {};
    uptr AllocBeg = reinterpret_cast<uptr>(Ptr);
@ -409,11 +425,11 @@ struct ScudoAllocator {
      uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg;
      Header.Offset = Offset >> MinAlignmentLog;
    }
-    CHECK_LE(UserBeg + Size, AllocBeg + AllocationSize);
+    CHECK_LE(UserBeg + Size, AllocBeg + AllocSize);
    Header.State = ChunkAllocated;
    Header.AllocType = Type;
    if (FromPrimary) {
-      Header.FromPrimary = FromPrimary;
+      Header.FromPrimary = 1;
      Header.SizeOrUnusedBytes = Size;
    } else {
      // The secondary fits the allocations to a page, so the amount of unused
@ -424,7 +440,7 @@ struct ScudoAllocator {
      if (TrailingBytes)
        Header.SizeOrUnusedBytes = PageSize - TrailingBytes;
    }
-    Header.Salt = static_cast<u8>(Salt);
+    Header.Salt = Salt;
    getScudoChunk(UserBeg)->storeHeader(&Header);
    void *UserPtr = reinterpret_cast<void *>(UserBeg);
    // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size);
@ -442,15 +458,18 @@ struct ScudoAllocator {
    if (BypassQuarantine) {
      Chunk->eraseHeader();
      void *Ptr = Chunk->getAllocBeg(Header);
-      ScudoThreadContext *ThreadContext = getThreadContextAndLock();
-      if (LIKELY(ThreadContext)) {
-        getBackendAllocator().Deallocate(getAllocatorCache(ThreadContext), Ptr,
-                                         FromPrimary);
-        ThreadContext->unlock();
+      if (FromPrimary) {
+        ScudoThreadContext *ThreadContext = getThreadContextAndLock();
+        if (LIKELY(ThreadContext)) {
+          getBackendAllocator().deallocatePrimary(
+              getAllocatorCache(ThreadContext), Ptr);
+          ThreadContext->unlock();
+        } else {
+          SpinMutexLock Lock(&FallbackMutex);
+          getBackendAllocator().deallocatePrimary(&FallbackAllocatorCache, Ptr);
+        }
      } else {
-        SpinMutexLock Lock(&FallbackMutex);
-        getBackendAllocator().Deallocate(&FallbackAllocatorCache, Ptr,
-                                         FromPrimary);
+        getBackendAllocator().deallocateSecondary(Ptr);
      }
    } else {
      UnpackedHeader NewHeader = *Header;
@ -580,7 +599,7 @@ struct ScudoAllocator {

  void *calloc(uptr NMemB, uptr Size) {
    initThreadMaybe();
-    if (CheckForCallocOverflow(NMemB, Size))
+    if (UNLIKELY(CheckForCallocOverflow(NMemB, Size)))
      return FailureHandler::OnBadRequest();
    return allocate(NMemB * Size, MinAlignment, FromMalloc, true);
  }
@ -589,13 +608,13 @@ struct ScudoAllocator {
    AllocatorCache *Cache = getAllocatorCache(ThreadContext);
    AllocatorQuarantine.Drain(getQuarantineCache(ThreadContext),
                              QuarantineCallback(Cache));
-    BackendAllocator.DestroyCache(Cache);
+    BackendAllocator.destroyCache(Cache);
  }

  uptr getStats(AllocatorStat StatType) {
    initThreadMaybe();
    uptr stats[AllocatorStatCount];
-    BackendAllocator.GetStats(stats);
+    BackendAllocator.getStats(stats);
    return stats[StatType];
  }
 };
@ -611,7 +630,7 @@ static void initScudoInternal(const AllocatorOptions &Options) {
 }

 void ScudoThreadContext::init() {
-  getBackendAllocator().InitCache(&Cache);
+  getBackendAllocator().initCache(&Cache);
  Prng.init();
  memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder));
 }
@ -621,7 +640,7 @@ void ScudoThreadContext::commitBack() {
 }

 void *scudoMalloc(uptr Size, AllocType Type) {
-  return Instance.allocate(Size, MinAlignment, Type);
+  return SetErrnoOnNull(Instance.allocate(Size, MinAlignment, Type));
 }

 void scudoFree(void *Ptr, AllocType Type) {
@ -634,54 +653,56 @@ void scudoSizedFree(void *Ptr, uptr Size, AllocType Type) {

 void *scudoRealloc(void *Ptr, uptr Size) {
  if (!Ptr)
-    return Instance.allocate(Size, MinAlignment, FromMalloc);
+    return SetErrnoOnNull(Instance.allocate(Size, MinAlignment, FromMalloc));
  if (Size == 0) {
    Instance.deallocate(Ptr, 0, FromMalloc);
    return nullptr;
  }
-  return Instance.reallocate(Ptr, Size);
+  return SetErrnoOnNull(Instance.reallocate(Ptr, Size));
 }

 void *scudoCalloc(uptr NMemB, uptr Size) {
-  return Instance.calloc(NMemB, Size);
+  return SetErrnoOnNull(Instance.calloc(NMemB, Size));
 }

 void *scudoValloc(uptr Size) {
-  return Instance.allocate(Size, GetPageSizeCached(), FromMemalign);
+  return SetErrnoOnNull(
+      Instance.allocate(Size, GetPageSizeCached(), FromMemalign));
 }

 void *scudoPvalloc(uptr Size) {
  uptr PageSize = GetPageSizeCached();
-  Size = RoundUpTo(Size, PageSize);
-  if (Size == 0) {
-    // pvalloc(0) should allocate one page.
-    Size = PageSize;
-  }
-  return Instance.allocate(Size, PageSize, FromMemalign);
+  // pvalloc(0) should allocate one page.
+  Size = Size ? RoundUpTo(Size, PageSize) : PageSize;
+  return SetErrnoOnNull(Instance.allocate(Size, PageSize, FromMemalign));
 }

 void *scudoMemalign(uptr Alignment, uptr Size) {
-  if (UNLIKELY(!IsPowerOfTwo(Alignment)))
+  if (UNLIKELY(!IsPowerOfTwo(Alignment))) {
+    errno = errno_EINVAL;
    return ScudoAllocator::FailureHandler::OnBadRequest();
-  return Instance.allocate(Size, Alignment, FromMemalign);
+  }
+  return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMemalign));
 }

 int scudoPosixMemalign(void **MemPtr, uptr Alignment, uptr Size) {
-  if (UNLIKELY(!IsPowerOfTwo(Alignment) || (Alignment % sizeof(void *)) != 0)) {
-    *MemPtr = ScudoAllocator::FailureHandler::OnBadRequest();
-    return EINVAL;
+  if (UNLIKELY(!CheckPosixMemalignAlignment(Alignment))) {
+    ScudoAllocator::FailureHandler::OnBadRequest();
+    return errno_EINVAL;
  }
-  *MemPtr = Instance.allocate(Size, Alignment, FromMemalign);
-  if (!*MemPtr)
-    return ENOMEM;
+  void *Ptr = Instance.allocate(Size, Alignment, FromMemalign);
+  if (UNLIKELY(!Ptr))
+    return errno_ENOMEM;
+  *MemPtr = Ptr;
  return 0;
 }

 void *scudoAlignedAlloc(uptr Alignment, uptr Size) {
-  // Alignment must be a power of 2, Size must be a multiple of Alignment.
-  if (UNLIKELY(!IsPowerOfTwo(Alignment) || (Size & (Alignment - 1)) != 0))
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(Alignment, Size))) {
+    errno = errno_EINVAL;
    return ScudoAllocator::FailureHandler::OnBadRequest();
-  return Instance.allocate(Size, Alignment, FromMalloc);
+  }
+  return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMalloc));
 }

 uptr scudoMallocUsableSize(void *Ptr) {
--- a/contrib/compiler-rt/lib/scudo/scudo_allocator_combined.h
+++ b/contrib/compiler-rt/lib/scudo/scudo_allocator_combined.h
@ -23,41 +23,47 @@ template <class PrimaryAllocator, class AllocatorCache,
    class SecondaryAllocator>
 class ScudoCombinedAllocator {
 public:
-  void Init(s32 ReleaseToOSIntervalMs) {
+  void init(s32 ReleaseToOSIntervalMs) {
    Primary.Init(ReleaseToOSIntervalMs);
    Secondary.Init();
    Stats.Init();
  }

-  void *Allocate(AllocatorCache *Cache, uptr Size, uptr Alignment,
-                 bool FromPrimary) {
-    if (FromPrimary)
-      return Cache->Allocate(&Primary, Primary.ClassID(Size));
+  // Primary allocations are always MinAlignment aligned, and as such do not
+  // require an Alignment parameter.
+  void *allocatePrimary(AllocatorCache *Cache, uptr Size) {
+    return Cache->Allocate(&Primary, Primary.ClassID(Size));
+  }
+
+  // Secondary allocations do not require a Cache, but do require an Alignment
+  // parameter.
+  void *allocateSecondary(uptr Size, uptr Alignment) {
    return Secondary.Allocate(&Stats, Size, Alignment);
  }

-  void Deallocate(AllocatorCache *Cache, void *Ptr, bool FromPrimary) {
-    if (FromPrimary)
-      Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr);
-    else
-      Secondary.Deallocate(&Stats, Ptr);
+  void deallocatePrimary(AllocatorCache *Cache, void *Ptr) {
+    Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr);
  }

-  uptr GetActuallyAllocatedSize(void *Ptr, bool FromPrimary) {
+  void deallocateSecondary(void *Ptr) {
+    Secondary.Deallocate(&Stats, Ptr);
+  }
+
+  uptr getActuallyAllocatedSize(void *Ptr, bool FromPrimary) {
    if (FromPrimary)
      return PrimaryAllocator::ClassIdToSize(Primary.GetSizeClass(Ptr));
    return Secondary.GetActuallyAllocatedSize(Ptr);
  }

-  void InitCache(AllocatorCache *Cache) {
+  void initCache(AllocatorCache *Cache) {
    Cache->Init(&Stats);
  }

-  void DestroyCache(AllocatorCache *Cache) {
+  void destroyCache(AllocatorCache *Cache) {
    Cache->Destroy(&Primary, &Stats);
  }

-  void GetStats(AllocatorStatCounters StatType) const {
+  void getStats(AllocatorStatCounters StatType) const {
    Stats.Get(StatType);
  }

--- a/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc
+++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc
@ -61,20 +61,13 @@
 // an exclusive lock; ThreadClock's are private to respective threads and so
 // do not need any protection.
 //
-// Description of ThreadClock state:
-// clk_ - fixed size vector clock.
-// nclk_ - effective size of the vector clock (the rest is zeros).
-// tid_ - index of the thread associated with he clock ("current thread").
-// last_acquire_ - current thread time when it acquired something from
-//   other threads.
-//
 // Description of SyncClock state:
 // clk_ - variable size vector clock, low kClkBits hold timestamp,
 //   the remaining bits hold "acquired" flag (the actual value is thread's
 //   reused counter);
 //   if acquried == thr->reused_, then the respective thread has already
-//   acquired this clock (except possibly dirty_tids_).
-// dirty_tids_ - holds up to two indeces in the vector clock that other threads
+//   acquired this clock (except possibly for dirty elements).
+// dirty_ - holds up to two indeces in the vector clock that other threads
 //   need to acquire regardless of "acquired" flag value;
 // release_store_tid_ - denotes that the clock state is a result of
 //   release-store operation by the thread with release_store_tid_ index.
@ -90,21 +83,51 @@

 namespace __tsan {

+static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
+  return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
+}
+
+// Drop reference to the first level block idx.
+static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
+  ClockBlock *cb = ctx->clock_alloc.Map(idx);
+  atomic_uint32_t *ref = ref_ptr(cb);
+  u32 v = atomic_load(ref, memory_order_acquire);
+  for (;;) {
+    CHECK_GT(v, 0);
+    if (v == 1)
+      break;
+    if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
+      return;
+  }
+  // First level block owns second level blocks, so them as well.
+  for (uptr i = 0; i < blocks; i++)
+    ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
+  ctx->clock_alloc.Free(c, idx);
+}
+
 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
    : tid_(tid)
-    , reused_(reused + 1) {  // 0 has special meaning
+    , reused_(reused + 1)  // 0 has special meaning
+    , cached_idx_()
+    , cached_size_()
+    , cached_blocks_() {
  CHECK_LT(tid, kMaxTidInClock);
  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
  nclk_ = tid_ + 1;
  last_acquire_ = 0;
  internal_memset(clk_, 0, sizeof(clk_));
-  clk_[tid_].reused = reused_;
 }

 void ThreadClock::ResetCached(ClockCache *c) {
+  if (cached_idx_) {
+    UnrefClockBlock(c, cached_idx_, cached_blocks_);
+    cached_idx_ = 0;
+    cached_size_ = 0;
+    cached_blocks_ = 0;
+  }
 }

-void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
+void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
  DCHECK_LE(nclk_, kMaxTid);
  DCHECK_LE(src->size_, kMaxTid);
  CPP_STAT_INC(StatClockAcquire);
@ -116,50 +139,46 @@ void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
    return;
  }

-  // Check if we've already acquired src after the last release operation on src
  bool acquired = false;
-  if (nclk > tid_) {
-    if (src->elem(tid_).reused == reused_) {
-      for (unsigned i = 0; i < kDirtyTids; i++) {
-        unsigned tid = src->dirty_tids_[i];
-        if (tid != kInvalidTid) {
-          u64 epoch = src->elem(tid).epoch;
-          if (clk_[tid].epoch < epoch) {
-            clk_[tid].epoch = epoch;
-            acquired = true;
-          }
-        }
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    SyncClock::Dirty dirty = src->dirty_[i];
+    unsigned tid = dirty.tid;
+    if (tid != kInvalidTid) {
+      if (clk_[tid] < dirty.epoch) {
+        clk_[tid] = dirty.epoch;
+        acquired = true;
      }
-      if (acquired) {
-        CPP_STAT_INC(StatClockAcquiredSomething);
-        last_acquire_ = clk_[tid_].epoch;
-      }
-      return;
    }
  }

-  // O(N) acquire.
-  CPP_STAT_INC(StatClockAcquireFull);
-  nclk_ = max(nclk_, nclk);
-  for (uptr i = 0; i < nclk; i++) {
-    u64 epoch = src->elem(i).epoch;
-    if (clk_[i].epoch < epoch) {
-      clk_[i].epoch = epoch;
-      acquired = true;
+  // Check if we've already acquired src after the last release operation on src
+  if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
+    // O(N) acquire.
+    CPP_STAT_INC(StatClockAcquireFull);
+    nclk_ = max(nclk_, nclk);
+    u64 *dst_pos = &clk_[0];
+    for (ClockElem &src_elem : *src) {
+      u64 epoch = src_elem.epoch;
+      if (*dst_pos < epoch) {
+        *dst_pos = epoch;
+        acquired = true;
+      }
+      dst_pos++;
    }
-  }

-  // Remember that this thread has acquired this clock.
-  if (nclk > tid_)
-    src->elem(tid_).reused = reused_;
+    // Remember that this thread has acquired this clock.
+    if (nclk > tid_)
+      src->elem(tid_).reused = reused_;
+  }

  if (acquired) {
    CPP_STAT_INC(StatClockAcquiredSomething);
-    last_acquire_ = clk_[tid_].epoch;
+    last_acquire_ = clk_[tid_];
+    ResetCached(c);
  }
 }

-void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
+void ThreadClock::release(ClockCache *c, SyncClock *dst) {
  DCHECK_LE(nclk_, kMaxTid);
  DCHECK_LE(dst->size_, kMaxTid);

@ -179,7 +198,7 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
  // since the last release on dst. If so, we need to update
  // only dst->elem(tid_).
  if (dst->elem(tid_).epoch > last_acquire_) {
-    UpdateCurrentThread(dst);
+    UpdateCurrentThread(c, dst);
    if (dst->release_store_tid_ != tid_ ||
        dst->release_store_reused_ != reused_)
      dst->release_store_tid_ = kInvalidTid;
@ -188,23 +207,24 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {

  // O(N) release.
  CPP_STAT_INC(StatClockReleaseFull);
+  dst->Unshare(c);
  // First, remember whether we've acquired dst.
  bool acquired = IsAlreadyAcquired(dst);
  if (acquired)
    CPP_STAT_INC(StatClockReleaseAcquired);
  // Update dst->clk_.
-  for (uptr i = 0; i < nclk_; i++) {
-    ClockElem &ce = dst->elem(i);
-    ce.epoch = max(ce.epoch, clk_[i].epoch);
+  dst->FlushDirty();
+  uptr i = 0;
+  for (ClockElem &ce : *dst) {
+    ce.epoch = max(ce.epoch, clk_[i]);
    ce.reused = 0;
+    i++;
  }
  // Clear 'acquired' flag in the remaining elements.
  if (nclk_ < dst->size_)
    CPP_STAT_INC(StatClockReleaseClearTail);
  for (uptr i = nclk_; i < dst->size_; i++)
    dst->elem(i).reused = 0;
-  for (unsigned i = 0; i < kDirtyTids; i++)
-    dst->dirty_tids_[i] = kInvalidTid;
  dst->release_store_tid_ = kInvalidTid;
  dst->release_store_reused_ = 0;
  // If we've acquired dst, remember this fact,
@ -213,11 +233,37 @@ void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
    dst->elem(tid_).reused = reused_;
 }

-void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
+void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
  DCHECK_LE(nclk_, kMaxTid);
  DCHECK_LE(dst->size_, kMaxTid);
  CPP_STAT_INC(StatClockStore);

+  if (dst->size_ == 0 && cached_idx_ != 0) {
+    // Reuse the cached clock.
+    // Note: we could reuse/cache the cached clock in more cases:
+    // we could update the existing clock and cache it, or replace it with the
+    // currently cached clock and release the old one. And for a shared
+    // existing clock, we could replace it with the currently cached;
+    // or unshare, update and cache. But, for simplicity, we currnetly reuse
+    // cached clock only when the target clock is empty.
+    dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
+    dst->tab_idx_ = cached_idx_;
+    dst->size_ = cached_size_;
+    dst->blocks_ = cached_blocks_;
+    CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
+    // The cached clock is shared (immutable),
+    // so this is where we store the current clock.
+    dst->dirty_[0].tid = tid_;
+    dst->dirty_[0].epoch = clk_[tid_];
+    dst->release_store_tid_ = tid_;
+    dst->release_store_reused_ = reused_;
+    // Rememeber that we don't need to acquire it in future.
+    dst->elem(tid_).reused = reused_;
+    // Grab a reference.
+    atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+    return;
+  }
+
  // Check if we need to resize dst.
  if (dst->size_ < nclk_)
    dst->Resize(c, nclk_);
@ -226,32 +272,41 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
      dst->release_store_reused_ == reused_ &&
      dst->elem(tid_).epoch > last_acquire_) {
    CPP_STAT_INC(StatClockStoreFast);
-    UpdateCurrentThread(dst);
+    UpdateCurrentThread(c, dst);
    return;
  }

  // O(N) release-store.
  CPP_STAT_INC(StatClockStoreFull);
-  for (uptr i = 0; i < nclk_; i++) {
-    ClockElem &ce = dst->elem(i);
-    ce.epoch = clk_[i].epoch;
+  dst->Unshare(c);
+  // Note: dst can be larger than this ThreadClock.
+  // This is fine since clk_ beyond size is all zeros.
+  uptr i = 0;
+  for (ClockElem &ce : *dst) {
+    ce.epoch = clk_[i];
    ce.reused = 0;
+    i++;
  }
-  // Clear the tail of dst->clk_.
-  if (nclk_ < dst->size_) {
-    for (uptr i = nclk_; i < dst->size_; i++) {
-      ClockElem &ce = dst->elem(i);
-      ce.epoch = 0;
-      ce.reused = 0;
-    }
-    CPP_STAT_INC(StatClockStoreTail);
-  }
-  for (unsigned i = 0; i < kDirtyTids; i++)
-    dst->dirty_tids_[i] = kInvalidTid;
+  for (uptr i = 0; i < kDirtyTids; i++)
+    dst->dirty_[i].tid = kInvalidTid;
  dst->release_store_tid_ = tid_;
  dst->release_store_reused_ = reused_;
  // Rememeber that we don't need to acquire it in future.
  dst->elem(tid_).reused = reused_;
+
+  // If the resulting clock is cachable, cache it for future release operations.
+  // The clock is always cachable if we released to an empty sync object.
+  if (cached_idx_ == 0 && dst->Cachable()) {
+    // Grab a reference to the ClockBlock.
+    atomic_uint32_t *ref = ref_ptr(dst->tab_);
+    if (atomic_load(ref, memory_order_acquire) == 1)
+      atomic_store_relaxed(ref, 2);
+    else
+      atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+    cached_idx_ = dst->tab_idx_;
+    cached_size_ = dst->size_;
+    cached_blocks_ = dst->blocks_;
+  }
 }

 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
@ -261,37 +316,36 @@ void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
 }

 // Updates only single element related to the current thread in dst->clk_.
-void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
+void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
  // Update the threads time, but preserve 'acquired' flag.
-  dst->elem(tid_).epoch = clk_[tid_].epoch;
-
  for (unsigned i = 0; i < kDirtyTids; i++) {
-    if (dst->dirty_tids_[i] == tid_) {
+    SyncClock::Dirty *dirty = &dst->dirty_[i];
+    const unsigned tid = dirty->tid;
+    if (tid == tid_ || tid == kInvalidTid) {
      CPP_STAT_INC(StatClockReleaseFast);
-      return;
-    }
-    if (dst->dirty_tids_[i] == kInvalidTid) {
-      CPP_STAT_INC(StatClockReleaseFast);
-      dst->dirty_tids_[i] = tid_;
+      dirty->tid = tid_;
+      dirty->epoch = clk_[tid_];
      return;
    }
  }
  // Reset all 'acquired' flags, O(N).
+  // We are going to touch dst elements, so we need to unshare it.
+  dst->Unshare(c);
  CPP_STAT_INC(StatClockReleaseSlow);
+  dst->elem(tid_).epoch = clk_[tid_];
  for (uptr i = 0; i < dst->size_; i++)
    dst->elem(i).reused = 0;
-  for (unsigned i = 0; i < kDirtyTids; i++)
-    dst->dirty_tids_[i] = kInvalidTid;
+  dst->FlushDirty();
 }

-// Checks whether the current threads has already acquired src.
+// Checks whether the current thread has already acquired src.
 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
  if (src->elem(tid_).reused != reused_)
    return false;
  for (unsigned i = 0; i < kDirtyTids; i++) {
-    unsigned tid = src->dirty_tids_[i];
-    if (tid != kInvalidTid) {
-      if (clk_[tid].epoch < src->elem(tid).epoch)
+    SyncClock::Dirty dirty = src->dirty_[i];
+    if (dirty.tid != kInvalidTid) {
+      if (clk_[dirty.tid] < dirty.epoch)
        return false;
    }
  }
@ -302,22 +356,19 @@ bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 // This function is called only from weird places like AcquireGlobal.
 void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
  DCHECK_LT(tid, kMaxTid);
-  DCHECK_GE(v, clk_[tid].epoch);
-  clk_[tid].epoch = v;
+  DCHECK_GE(v, clk_[tid]);
+  clk_[tid] = v;
  if (nclk_ <= tid)
    nclk_ = tid + 1;
-  last_acquire_ = clk_[tid_].epoch;
+  last_acquire_ = clk_[tid_];
+  ResetCached(c);
 }

 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
  printf("clock=[");
  for (uptr i = 0; i < nclk_; i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
-  printf("] reused=[");
-  for (uptr i = 0; i < nclk_; i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
-  printf("] tid=%u/%u last_acq=%llu",
-      tid_, reused_, last_acquire_);
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
+  printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
 }

 SyncClock::SyncClock() {
@ -327,22 +378,14 @@ SyncClock::SyncClock() {
 SyncClock::~SyncClock() {
  // Reset must be called before dtor.
  CHECK_EQ(size_, 0);
+  CHECK_EQ(blocks_, 0);
  CHECK_EQ(tab_, 0);
  CHECK_EQ(tab_idx_, 0);
 }

 void SyncClock::Reset(ClockCache *c) {
-  if (size_ == 0) {
-    // nothing
-  } else if (size_ <= ClockBlock::kClockCount) {
-    // One-level table.
-    ctx->clock_alloc.Free(c, tab_idx_);
-  } else {
-    // Two-level table.
-    for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
-      ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
-    ctx->clock_alloc.Free(c, tab_idx_);
-  }
+  if (size_)
+    UnrefClockBlock(c, tab_idx_, blocks_);
  ResetImpl();
 }

@ -350,66 +393,171 @@ void SyncClock::ResetImpl() {
  tab_ = 0;
  tab_idx_ = 0;
  size_ = 0;
+  blocks_ = 0;
  release_store_tid_ = kInvalidTid;
  release_store_reused_ = 0;
  for (uptr i = 0; i < kDirtyTids; i++)
-    dirty_tids_[i] = kInvalidTid;
+    dirty_[i].tid = kInvalidTid;
 }

 void SyncClock::Resize(ClockCache *c, uptr nclk) {
  CPP_STAT_INC(StatClockReleaseResize);
-  if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
-      RoundUpTo(size_, ClockBlock::kClockCount)) {
-    // Growing within the same block.
+  Unshare(c);
+  if (nclk <= capacity()) {
    // Memory is already allocated, just increase the size.
    size_ = nclk;
    return;
  }
-  if (nclk <= ClockBlock::kClockCount) {
+  if (size_ == 0) {
    // Grow from 0 to one-level table.
    CHECK_EQ(size_, 0);
+    CHECK_EQ(blocks_, 0);
    CHECK_EQ(tab_, 0);
    CHECK_EQ(tab_idx_, 0);
-    size_ = nclk;
    tab_idx_ = ctx->clock_alloc.Alloc(c);
    tab_ = ctx->clock_alloc.Map(tab_idx_);
    internal_memset(tab_, 0, sizeof(*tab_));
-    return;
+    atomic_store_relaxed(ref_ptr(tab_), 1);
+    size_ = 1;
+  } else if (size_ > blocks_ * ClockBlock::kClockCount) {
+    u32 idx = ctx->clock_alloc.Alloc(c);
+    ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
+    uptr top = size_ - blocks_ * ClockBlock::kClockCount;
+    CHECK_LT(top, ClockBlock::kClockCount);
+    const uptr move = top * sizeof(tab_->clock[0]);
+    internal_memcpy(&new_cb->clock[0], tab_->clock, move);
+    internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
+    internal_memset(tab_->clock, 0, move);
+    append_block(idx);
  }
-  // Growing two-level table.
-  if (size_ == 0) {
-    // Allocate first level table.
-    tab_idx_ = ctx->clock_alloc.Alloc(c);
-    tab_ = ctx->clock_alloc.Map(tab_idx_);
-    internal_memset(tab_, 0, sizeof(*tab_));
-  } else if (size_ <= ClockBlock::kClockCount) {
-    // Transform one-level table to two-level table.
-    u32 old = tab_idx_;
-    tab_idx_ = ctx->clock_alloc.Alloc(c);
-    tab_ = ctx->clock_alloc.Map(tab_idx_);
-    internal_memset(tab_, 0, sizeof(*tab_));
-    tab_->table[0] = old;
-  }
-  // At this point we have first level table allocated.
+  // At this point we have first level table allocated and all clock elements
+  // are evacuated from it to a second level block.
  // Add second level tables as necessary.
-  for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
-      i < nclk; i += ClockBlock::kClockCount) {
+  while (nclk > capacity()) {
    u32 idx = ctx->clock_alloc.Alloc(c);
    ClockBlock *cb = ctx->clock_alloc.Map(idx);
    internal_memset(cb, 0, sizeof(*cb));
-    CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
-    tab_->table[i/ClockBlock::kClockCount] = idx;
+    append_block(idx);
  }
  size_ = nclk;
 }

-ClockElem &SyncClock::elem(unsigned tid) const {
+// Flushes all dirty elements into the main clock array.
+void SyncClock::FlushDirty() {
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    Dirty *dirty = &dirty_[i];
+    if (dirty->tid != kInvalidTid) {
+      CHECK_LT(dirty->tid, size_);
+      elem(dirty->tid).epoch = dirty->epoch;
+      dirty->tid = kInvalidTid;
+    }
+  }
+}
+
+bool SyncClock::IsShared() const {
+  if (size_ == 0)
+    return false;
+  atomic_uint32_t *ref = ref_ptr(tab_);
+  u32 v = atomic_load(ref, memory_order_acquire);
+  CHECK_GT(v, 0);
+  return v > 1;
+}
+
+// Unshares the current clock if it's shared.
+// Shared clocks are immutable, so they need to be unshared before any updates.
+// Note: this does not apply to dirty entries as they are not shared.
+void SyncClock::Unshare(ClockCache *c) {
+  if (!IsShared())
+    return;
+  // First, copy current state into old.
+  SyncClock old;
+  old.tab_ = tab_;
+  old.tab_idx_ = tab_idx_;
+  old.size_ = size_;
+  old.blocks_ = blocks_;
+  old.release_store_tid_ = release_store_tid_;
+  old.release_store_reused_ = release_store_reused_;
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    old.dirty_[i] = dirty_[i];
+  // Then, clear current object.
+  ResetImpl();
+  // Allocate brand new clock in the current object.
+  Resize(c, old.size_);
+  // Now copy state back into this object.
+  Iter old_iter(&old);
+  for (ClockElem &ce : *this) {
+    ce = *old_iter;
+    ++old_iter;
+  }
+  release_store_tid_ = old.release_store_tid_;
+  release_store_reused_ = old.release_store_reused_;
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    dirty_[i] = old.dirty_[i];
+  // Drop reference to old and delete if necessary.
+  old.Reset(c);
+}
+
+// Can we cache this clock for future release operations?
+ALWAYS_INLINE bool SyncClock::Cachable() const {
+  if (size_ == 0)
+    return false;
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    if (dirty_[i].tid != kInvalidTid)
+      return false;
+  }
+  return atomic_load_relaxed(ref_ptr(tab_)) == 1;
+}
+
+// elem linearizes the two-level structure into linear array.
+// Note: this is used only for one time accesses, vector operations use
+// the iterator as it is much faster.
+ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
  DCHECK_LT(tid, size_);
-  if (size_ <= ClockBlock::kClockCount)
+  const uptr block = tid / ClockBlock::kClockCount;
+  DCHECK_LE(block, blocks_);
+  tid %= ClockBlock::kClockCount;
+  if (block == blocks_)
    return tab_->clock[tid];
-  u32 idx = tab_->table[tid / ClockBlock::kClockCount];
+  u32 idx = get_block(block);
  ClockBlock *cb = ctx->clock_alloc.Map(idx);
-  return cb->clock[tid % ClockBlock::kClockCount];
+  return cb->clock[tid];
+}
+
+ALWAYS_INLINE uptr SyncClock::capacity() const {
+  if (size_ == 0)
+    return 0;
+  uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
+  // How many clock elements we can fit into the first level block.
+  // +1 for ref counter.
+  uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
+  return blocks_ * ClockBlock::kClockCount + top;
+}
+
+ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
+  DCHECK(size_);
+  DCHECK_LT(bi, blocks_);
+  return tab_->table[ClockBlock::kBlockIdx - bi];
+}
+
+ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
+  uptr bi = blocks_++;
+  CHECK_EQ(get_block(bi), 0);
+  tab_->table[ClockBlock::kBlockIdx - bi] = idx;
+}
+
+// Used only by tests.
+u64 SyncClock::get(unsigned tid) const {
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    Dirty dirty = dirty_[i];
+    if (dirty.tid == tid)
+      return dirty.epoch;
+  }
+  return elem(tid).epoch;
+}
+
+// Used only by Iter test.
+u64 SyncClock::get_clean(unsigned tid) const {
+  return elem(tid).epoch;
 }

 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
@ -419,8 +567,32 @@ void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
  printf("] reused=[");
  for (uptr i = 0; i < size_; i++)
    printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
-  printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
+  printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
      release_store_tid_, release_store_reused_,
-      dirty_tids_[0], dirty_tids_[1]);
+      dirty_[0].tid, dirty_[0].epoch,
+      dirty_[1].tid, dirty_[1].epoch);
+}
+
+void SyncClock::Iter::Next() {
+  // Finished with the current block, move on to the next one.
+  block_++;
+  if (block_ < parent_->blocks_) {
+    // Iterate over the next second level block.
+    u32 idx = parent_->get_block(block_);
+    ClockBlock *cb = ctx->clock_alloc.Map(idx);
+    pos_ = &cb->clock[0];
+    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+        ClockBlock::kClockCount);
+    return;
+  }
+  if (block_ == parent_->blocks_ &&
+      parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
+    // Iterate over elements in the first level block.
+    pos_ = &parent_->tab_->clock[0];
+    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+        ClockBlock::kClockCount);
+    return;
+  }
+  parent_ = nullptr;  // denotes end
 }
 }  // namespace __tsan
--- a/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.h
+++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_clock.h
@ -18,25 +18,6 @@

 namespace __tsan {

-struct ClockElem {
-  u64 epoch  : kClkBits;
-  u64 reused : 64 - kClkBits;
-};
-
-struct ClockBlock {
-  static const uptr kSize = 512;
-  static const uptr kTableSize = kSize / sizeof(u32);
-  static const uptr kClockCount = kSize / sizeof(ClockElem);
-
-  union {
-    u32       table[kTableSize];
-    ClockElem clock[kClockCount];
-  };
-
-  ClockBlock() {
-  }
-};
-
 typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;
 typedef DenseSlabAllocCache ClockCache;

@ -46,69 +27,117 @@ class SyncClock {
  SyncClock();
  ~SyncClock();

-  uptr size() const {
-    return size_;
-  }
+  uptr size() const;

-  u64 get(unsigned tid) const {
-    return elem(tid).epoch;
-  }
+  // These are used only in tests.
+  u64 get(unsigned tid) const;
+  u64 get_clean(unsigned tid) const;

  void Resize(ClockCache *c, uptr nclk);
  void Reset(ClockCache *c);

  void DebugDump(int(*printf)(const char *s, ...));

+  // Clock element iterator.
+  // Note: it iterates only over the table without regard to dirty entries.
+  class Iter {
+   public:
+    explicit Iter(SyncClock* parent);
+    Iter& operator++();
+    bool operator!=(const Iter& other);
+    ClockElem &operator*();
+
+   private:
+    SyncClock *parent_;
+    // [pos_, end_) is the current continuous range of clock elements.
+    ClockElem *pos_;
+    ClockElem *end_;
+    int block_;  // Current number of second level block.
+
+    NOINLINE void Next();
+  };
+
+  Iter begin();
+  Iter end();
+
 private:
-  friend struct ThreadClock;
+  friend class ThreadClock;
+  friend class Iter;
  static const uptr kDirtyTids = 2;

+  struct Dirty {
+    u64 epoch  : kClkBits;
+    u64 tid : 64 - kClkBits;  // kInvalidId if not active
+  };
+
  unsigned release_store_tid_;
  unsigned release_store_reused_;
-  unsigned dirty_tids_[kDirtyTids];
-  // tab_ contains indirect pointer to a 512b block using DenseSlabAlloc.
-  // If size_ <= 64, then tab_ points to an array with 64 ClockElem's.
-  // Otherwise, tab_ points to an array with 128 u32 elements,
+  Dirty dirty_[kDirtyTids];
+  // If size_ is 0, tab_ is nullptr.
+  // If size <= 64 (kClockCount), tab_ contains pointer to an array with
+  // 64 ClockElem's (ClockBlock::clock).
+  // Otherwise, tab_ points to an array with up to 127 u32 elements,
  // each pointing to the second-level 512b block with 64 ClockElem's.
+  // Unused space in the first level ClockBlock is used to store additional
+  // clock elements.
+  // The last u32 element in the first level ClockBlock is always used as
+  // reference counter.
+  //
+  // See the following scheme for details.
+  // All memory blocks are 512 bytes (allocated from ClockAlloc).
+  // Clock (clk) elements are 64 bits.
+  // Idx and ref are 32 bits.
+  //
+  // tab_
+  //    |
+  //    \/
+  //    +----------------------------------------------------+
+  //    | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
+  //    +----------------------------------------------------+
+  //                                        |      |
+  //                                        |      \/
+  //                                        |      +----------------+
+  //                                        |      | clk0 ... clk63 |
+  //                                        |      +----------------+
+  //                                        \/
+  //                                        +------------------+
+  //                                        | clk64 ... clk127 |
+  //                                        +------------------+
+  //
+  // Note: dirty entries, if active, always override what's stored in the clock.
  ClockBlock *tab_;
  u32 tab_idx_;
-  u32 size_;
+  u16 size_;
+  u16 blocks_;  // Number of second level blocks.

+  void Unshare(ClockCache *c);
+  bool IsShared() const;
+  bool Cachable() const;
  void ResetImpl();
+  void FlushDirty();
+  uptr capacity() const;
+  u32 get_block(uptr bi) const;
+  void append_block(u32 idx);
  ClockElem &elem(unsigned tid) const;
 };

 // The clock that lives in threads.
-struct ThreadClock {
+class ThreadClock {
 public:
  typedef DenseSlabAllocCache Cache;

  explicit ThreadClock(unsigned tid, unsigned reused = 0);

-  u64 get(unsigned tid) const {
-    DCHECK_LT(tid, kMaxTidInClock);
-    return clk_[tid].epoch;
-  }
-
+  u64 get(unsigned tid) const;
  void set(ClockCache *c, unsigned tid, u64 v);
+  void set(u64 v);
+  void tick();
+  uptr size() const;

-  void set(u64 v) {
-    DCHECK_GE(v, clk_[tid_].epoch);
-    clk_[tid_].epoch = v;
-  }
-
-  void tick() {
-    clk_[tid_].epoch++;
-  }
-
-  uptr size() const {
-    return nclk_;
-  }
-
-  void acquire(ClockCache *c, const SyncClock *src);
-  void release(ClockCache *c, SyncClock *dst) const;
+  void acquire(ClockCache *c, SyncClock *src);
+  void release(ClockCache *c, SyncClock *dst);
  void acq_rel(ClockCache *c, SyncClock *dst);
-  void ReleaseStore(ClockCache *c, SyncClock *dst) const;
+  void ReleaseStore(ClockCache *c, SyncClock *dst);
  void ResetCached(ClockCache *c);

  void DebugReset();
@ -116,16 +145,82 @@ struct ThreadClock {

 private:
  static const uptr kDirtyTids = SyncClock::kDirtyTids;
+  // Index of the thread associated with he clock ("current thread").
  const unsigned tid_;
-  const unsigned reused_;
+  const unsigned reused_;  // tid_ reuse count.
+  // Current thread time when it acquired something from other threads.
  u64 last_acquire_;
+
+  // Cached SyncClock (without dirty entries and release_store_tid_).
+  // We reuse it for subsequent store-release operations without intervening
+  // acquire operations. Since it is shared (and thus constant), clock value
+  // for the current thread is then stored in dirty entries in the SyncClock.
+  // We host a refernece to the table while it is cached here.
+  u32 cached_idx_;
+  u16 cached_size_;
+  u16 cached_blocks_;
+
+  // Number of active elements in the clk_ table (the rest is zeros).
  uptr nclk_;
-  ClockElem clk_[kMaxTidInClock];
+  u64 clk_[kMaxTidInClock];  // Fixed size vector clock.

  bool IsAlreadyAcquired(const SyncClock *src) const;
-  void UpdateCurrentThread(SyncClock *dst) const;
+  void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
 };

+ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
+  DCHECK_LT(tid, kMaxTidInClock);
+  return clk_[tid];
+}
+
+ALWAYS_INLINE void ThreadClock::set(u64 v) {
+  DCHECK_GE(v, clk_[tid_]);
+  clk_[tid_] = v;
+}
+
+ALWAYS_INLINE void ThreadClock::tick() {
+  clk_[tid_]++;
+}
+
+ALWAYS_INLINE uptr ThreadClock::size() const {
+  return nclk_;
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
+  return Iter(this);
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
+  return Iter(nullptr);
+}
+
+ALWAYS_INLINE uptr SyncClock::size() const {
+  return size_;
+}
+
+ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
+    : parent_(parent)
+    , pos_(nullptr)
+    , end_(nullptr)
+    , block_(-1) {
+  if (parent)
+    Next();
+}
+
+ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
+  pos_++;
+  if (UNLIKELY(pos_ >= end_))
+    Next();
+  return *this;
+}
+
+ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
+  return parent_ != other.parent_;
+}
+
+ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
+  return *pos_;
+}
 }  // namespace __tsan

 #endif  // TSAN_CLOCK_H
--- a/contrib/compiler-rt/lib/tsan/rtl/tsan_defs.h
+++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_defs.h
@ -38,15 +38,40 @@

 namespace __tsan {

+const int kClkBits = 42;
+const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
+
+struct ClockElem {
+  u64 epoch  : kClkBits;
+  u64 reused : 64 - kClkBits;  // tid reuse count
+};
+
+struct ClockBlock {
+  static const uptr kSize = 512;
+  static const uptr kTableSize = kSize / sizeof(u32);
+  static const uptr kClockCount = kSize / sizeof(ClockElem);
+  static const uptr kRefIdx = kTableSize - 1;
+  static const uptr kBlockIdx = kTableSize - 2;
+
+  union {
+    u32       table[kTableSize];
+    ClockElem clock[kClockCount];
+  };
+
+  ClockBlock() {
+  }
+};
+
 const int kTidBits = 13;
-const unsigned kMaxTid = 1 << kTidBits;
+// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is
+// occupied by reference counter, so total number of elements we can store
+// in SyncClock is kClockCount * (kTableSize - 1).
+const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;
 #if !SANITIZER_GO
 const unsigned kMaxTidInClock = kMaxTid * 2;  // This includes msb 'freed' bit.
 #else
 const unsigned kMaxTidInClock = kMaxTid;  // Go does not track freed memory.
 #endif
-const int kClkBits = 42;
-const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
 const uptr kShadowStackSize = 64 * 1024;

 // Count of shadow values in a shadow cell.
@ -74,7 +99,7 @@ const bool kCollectHistory = false;
 const bool kCollectHistory = true;
 #endif

-const unsigned kInvalidTid = (unsigned)-1;
+const u16 kInvalidTid = kMaxTid + 1;

 // The following "build consistency" machinery ensures that all source files
 // are built in the same configuration. Inconsistent builds lead to
--- a/contrib/compiler-rt/lib/tsan/rtl/tsan_mman.cc
+++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_mman.cc
@ -10,6 +10,7 @@
 // This file is a part of ThreadSanitizer (TSan), a race detector.
 //
 //===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
--- a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
@ -286,7 +286,7 @@ void InitializePlatform() {
 int ExtractResolvFDs(void *state, int *fds, int nfd) {
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
  int cnt = 0;
-  __res_state *statp = (__res_state*)state;
+  struct __res_state *statp = (struct __res_state*)state;
  for (int i = 0; i < MAXNS && cnt < nfd; i++) {
    if (statp->_u._ext.nsaddrs[i] && statp->_u._ext.nssocks[i] != -1)
      fds[cnt++] = statp->_u._ext.nssocks[i];
--- a/contrib/compiler-rt/lib/ubsan/ubsan_handlers.cc
+++ b/contrib/compiler-rt/lib/ubsan/ubsan_handlers.cc
@ -573,14 +573,19 @@ static void handlePointerOverflowImpl(PointerOverflowData *Data,

  ScopedReport R(Opts, Loc, ET);

-  if ((sptr(Base) >= 0) == (sptr(Result) >= 0))
-    Diag(Loc, DL_Error, "unsigned pointer index expression result is %0, "
-                        "preceding its base %1")
-        << (void *)Result << (void *)Base;
-  else
+  if ((sptr(Base) >= 0) == (sptr(Result) >= 0)) {
+    if (Base > Result)
+      Diag(Loc, DL_Error, "addition of unsigned offset to %0 overflowed to %1")
+          << (void *)Base << (void *)Result;
+    else
+      Diag(Loc, DL_Error,
+           "subtraction of unsigned offset from %0 overflowed to %1")
+          << (void *)Base << (void *)Result;
+  } else {
    Diag(Loc, DL_Error,
         "pointer index expression with base %0 overflowed to %1")
        << (void *)Base << (void *)Result;
+  }
 }

 void __ubsan::__ubsan_handle_pointer_overflow(PointerOverflowData *Data,
--- a/contrib/libc++/include/__config
+++ b/contrib/libc++/include/__config
@ -229,8 +229,9 @@
 #  define _LIBCPP_SHORT_WCHAR   1
 // Both MinGW and native MSVC provide a "MSVC"-like enviroment
 #  define _LIBCPP_MSVCRT_LIKE
-// If mingw not explicitly detected, assume using MS C runtime only.
-#  ifndef __MINGW32__
+// If mingw not explicitly detected, assume using MS C runtime only if
+// a MS compatibility version is specified.
+#  if defined(_MSC_VER) && !defined(__MINGW32__)
 #    define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library
 #  endif
 #  if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__))
@ -625,7 +626,6 @@ namespace std {
 #define _LIBCPP_HIDDEN
 #define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
 #define _LIBCPP_TEMPLATE_VIS
-#define _LIBCPP_FUNC_VIS_ONLY
 #define _LIBCPP_ENUM_VIS

 #if defined(_LIBCPP_COMPILER_MSVC)
@ -684,10 +684,6 @@ namespace std {
 #  endif
 #endif

-#ifndef _LIBCPP_FUNC_VIS_ONLY
-# define _LIBCPP_FUNC_VIS_ONLY _LIBCPP_FUNC_VIS
-#endif
-
 #ifndef _LIBCPP_EXTERN_VIS
 # define _LIBCPP_EXTERN_VIS
 #endif
@ -925,8 +921,10 @@ template <unsigned> struct __static_assert_check {};
 #    define _LIBCPP_STD_VER 11
 #  elif __cplusplus <= 201402L
 #    define _LIBCPP_STD_VER 14
+#  elif __cplusplus <= 201703L
+#    define _LIBCPP_STD_VER 17
 #  else
-#    define _LIBCPP_STD_VER 16  // current year, or date of c++17 ratification
+#    define _LIBCPP_STD_VER 18  // current year, or date of c++2a ratification
 #  endif
 #endif  // _LIBCPP_STD_VER

--- a/contrib/libc++/include/algorithm
+++ b/contrib/libc++/include/algorithm
@ -4234,10 +4234,6 @@ sort(__wrap_iter<_Tp*> __first, __wrap_iter<_Tp*> __last, _Compare __comp)
    _VSTD::sort<_Tp*, _Comp_ref>(__first.base(), __last.base(), __comp);
 }

-#ifdef _LIBCPP_MSVC
-#pragma warning( push )
-#pragma warning( disable: 4231)
-#endif // _LIBCPP_MSVC
 _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&))
 _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&))
 _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&))
@ -4271,9 +4267,6 @@ _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less
 _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<long double>&, long double*>(long double*, long double*, __less<long double>&))

 _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS unsigned __sort5<__less<long double>&, long double*>(long double*, long double*, long double*, long double*, long double*, __less<long double>&))
-#ifdef _LIBCPP_MSVC
-#pragma warning( pop )
-#endif  // _LIBCPP_MSVC

 // lower_bound

--- a/contrib/libc++/include/string
+++ b/contrib/libc++/include/string
@ -578,14 +578,7 @@ __basic_string_common<__b>::__throw_out_of_range() const
    _VSTD::__throw_out_of_range("basic_string");
 }

-#ifdef _LIBCPP_MSVC
-#pragma warning( push )
-#pragma warning( disable: 4231 )
-#endif // _LIBCPP_MSVC
 _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __basic_string_common<true>)
-#ifdef _LIBCPP_MSVC
-#pragma warning( pop )
-#endif // _LIBCPP_MSVC

 #ifdef _LIBCPP_NO_EXCEPTIONS
 template <class _Iter>
@ -4006,7 +3999,7 @@ basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator*

 _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string<char>)
 _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string<wchar_t>)
-_LIBCPP_EXTERN_TEMPLATE(string operator+<char, char_traits<char>, allocator<char> >(char const*, string const&))
+_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS string operator+<char, char_traits<char>, allocator<char> >(char const*, string const&))

 #if _LIBCPP_STD_VER > 11 
 // Literal suffixes for basic_string [basic.string.literals]
--- a/contrib/libc++/include/vector
+++ b/contrib/libc++/include/vector
@ -310,14 +310,7 @@ __vector_base_common<__b>::__throw_out_of_range() const
    _VSTD::__throw_out_of_range("vector");
 }

-#ifdef _LIBCPP_MSVC
-#pragma warning( push )
-#pragma warning( disable: 4231 )
-#endif // _LIBCPP_MSVC
 _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __vector_base_common<true>)
-#ifdef _LIBCPP_MSVC
-#pragma warning( pop )
-#endif // _LIBCPP_MSVC

 template <class _Tp, class _Allocator>
 class __vector_base
--- a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
@ -29,9 +29,9 @@ namespace llvm {
 /// DominanceFrontierBase - Common base class for computing forward and inverse
 /// dominance frontiers for a function.
 ///
-template <class BlockT>
+template <class BlockT, bool IsPostDom>
 class DominanceFrontierBase {
-public:
+ public:
  typedef std::set<BlockT *> DomSetType;                // Dom set for a bb
  typedef std::map<BlockT *, DomSetType> DomSetMapType; // Dom set map

@ -40,10 +40,10 @@ class DominanceFrontierBase {

  DomSetMapType Frontiers;
  std::vector<BlockT *> Roots;
-  const bool IsPostDominators;
+  static constexpr bool IsPostDominators = IsPostDom;

-public:
-  DominanceFrontierBase(bool isPostDom) : IsPostDominators(isPostDom) {}
+ public:
+  DominanceFrontierBase() {}

  /// getRoots - Return the root blocks of the current CFG.  This may include
  /// multiple blocks if we are computing post dominators.  For forward
@ -96,7 +96,7 @@ class DominanceFrontierBase {

  /// compare - Return true if the other dominance frontier base matches
  /// this dominance frontier base. Otherwise return false.
-  bool compare(DominanceFrontierBase<BlockT> &Other) const;
+  bool compare(DominanceFrontierBase &Other) const;

  /// print - Convert to human readable form
  ///
@ -113,22 +113,21 @@ class DominanceFrontierBase {
 /// used to compute a forward dominator frontiers.
 ///
 template <class BlockT>
-class ForwardDominanceFrontierBase : public DominanceFrontierBase<BlockT> {
-private:
+class ForwardDominanceFrontierBase
+    : public DominanceFrontierBase<BlockT, false> {
+ private:
  typedef GraphTraits<BlockT *> BlockTraits;

 public:
-  typedef DominatorTreeBase<BlockT> DomTreeT;
-  typedef DomTreeNodeBase<BlockT> DomTreeNodeT;
-  typedef typename DominanceFrontierBase<BlockT>::DomSetType DomSetType;
+ typedef DomTreeBase<BlockT> DomTreeT;
+ typedef DomTreeNodeBase<BlockT> DomTreeNodeT;
+ typedef typename DominanceFrontierBase<BlockT, false>::DomSetType DomSetType;

-  ForwardDominanceFrontierBase() : DominanceFrontierBase<BlockT>(false) {}
-
-  void analyze(DomTreeT &DT) {
-    this->Roots = DT.getRoots();
-    assert(this->Roots.size() == 1 &&
-           "Only one entry block for forward domfronts!");
-    calculate(DT, DT[this->Roots[0]]);
+ void analyze(DomTreeT &DT) {
+   this->Roots = DT.getRoots();
+   assert(this->Roots.size() == 1 &&
+          "Only one entry block for forward domfronts!");
+   calculate(DT, DT[this->Roots[0]]);
  }

  const DomSetType &calculate(const DomTreeT &DT, const DomTreeNodeT *Node);
@ -136,15 +135,16 @@ class ForwardDominanceFrontierBase : public DominanceFrontierBase<BlockT> {

 class DominanceFrontier : public ForwardDominanceFrontierBase<BasicBlock> {
 public:
-  typedef DominatorTreeBase<BasicBlock> DomTreeT;
-  typedef DomTreeNodeBase<BasicBlock> DomTreeNodeT;
-  typedef DominanceFrontierBase<BasicBlock>::DomSetType DomSetType;
-  typedef DominanceFrontierBase<BasicBlock>::iterator iterator;
-  typedef DominanceFrontierBase<BasicBlock>::const_iterator const_iterator;
+ typedef DomTreeBase<BasicBlock> DomTreeT;
+ typedef DomTreeNodeBase<BasicBlock> DomTreeNodeT;
+ typedef DominanceFrontierBase<BasicBlock, false>::DomSetType DomSetType;
+ typedef DominanceFrontierBase<BasicBlock, false>::iterator iterator;
+ typedef DominanceFrontierBase<BasicBlock, false>::const_iterator
+     const_iterator;

-  /// Handle invalidation explicitly.
-  bool invalidate(Function &F, const PreservedAnalyses &PA,
-                  FunctionAnalysisManager::Invalidator &);
+ /// Handle invalidation explicitly.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+                 FunctionAnalysisManager::Invalidator &);
 };

 class DominanceFrontierWrapperPass : public FunctionPass {
@ -168,7 +168,8 @@ class DominanceFrontierWrapperPass : public FunctionPass {
  void dump() const;
 };

-extern template class DominanceFrontierBase<BasicBlock>;
+extern template class DominanceFrontierBase<BasicBlock, false>;
+extern template class DominanceFrontierBase<BasicBlock, true>;
 extern template class ForwardDominanceFrontierBase<BasicBlock>;

 /// \brief Analysis pass which computes a \c DominanceFrontier.
--- a/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
@ -39,33 +39,33 @@ class DFCalculateWorkObject {
  const DomTreeNodeT *parentNode;
 };

-template <class BlockT>
-void DominanceFrontierBase<BlockT>::removeBlock(BlockT *BB) {
+template <class BlockT, bool IsPostDom>
+void DominanceFrontierBase<BlockT, IsPostDom>::removeBlock(BlockT *BB) {
  assert(find(BB) != end() && "Block is not in DominanceFrontier!");
  for (iterator I = begin(), E = end(); I != E; ++I)
    I->second.erase(BB);
  Frontiers.erase(BB);
 }

-template <class BlockT>
-void DominanceFrontierBase<BlockT>::addToFrontier(iterator I,
-                                                  BlockT *Node) {
+template <class BlockT, bool IsPostDom>
+void DominanceFrontierBase<BlockT, IsPostDom>::addToFrontier(iterator I,
+                                                             BlockT *Node) {
  assert(I != end() && "BB is not in DominanceFrontier!");
  assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB");
  I->second.erase(Node);
 }

-template <class BlockT>
-void DominanceFrontierBase<BlockT>::removeFromFrontier(iterator I,
-                                                       BlockT *Node) {
+template <class BlockT, bool IsPostDom>
+void DominanceFrontierBase<BlockT, IsPostDom>::removeFromFrontier(
+    iterator I, BlockT *Node) {
  assert(I != end() && "BB is not in DominanceFrontier!");
  assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB");
  I->second.erase(Node);
 }

-template <class BlockT>
-bool DominanceFrontierBase<BlockT>::compareDomSet(DomSetType &DS1,
-                                                  const DomSetType &DS2) const {
+template <class BlockT, bool IsPostDom>
+bool DominanceFrontierBase<BlockT, IsPostDom>::compareDomSet(
+    DomSetType &DS1, const DomSetType &DS2) const {
  std::set<BlockT *> tmpSet;
  for (BlockT *BB : DS2)
    tmpSet.insert(BB);
@ -88,9 +88,9 @@ bool DominanceFrontierBase<BlockT>::compareDomSet(DomSetType &DS1,
  return false;
 }

-template <class BlockT>
-bool DominanceFrontierBase<BlockT>::compare(
-    DominanceFrontierBase<BlockT> &Other) const {
+template <class BlockT, bool IsPostDom>
+bool DominanceFrontierBase<BlockT, IsPostDom>::compare(
+    DominanceFrontierBase<BlockT, IsPostDom> &Other) const {
  DomSetMapType tmpFrontiers;
  for (typename DomSetMapType::const_iterator I = Other.begin(),
                                              E = Other.end();
@ -118,8 +118,8 @@ bool DominanceFrontierBase<BlockT>::compare(
  return false;
 }

-template <class BlockT>
-void DominanceFrontierBase<BlockT>::print(raw_ostream &OS) const {
+template <class BlockT, bool IsPostDom>
+void DominanceFrontierBase<BlockT, IsPostDom>::print(raw_ostream &OS) const {
  for (const_iterator I = begin(), E = end(); I != E; ++I) {
    OS << "  DomFrontier for BB ";
    if (I->first)
@ -142,8 +142,8 @@ void DominanceFrontierBase<BlockT>::print(raw_ostream &OS) const {
 }

 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-template <class BlockT>
-void DominanceFrontierBase<BlockT>::dump() const {
+template <class BlockT, bool IsPostDom>
+void DominanceFrontierBase<BlockT, IsPostDom>::dump() const {
  print(dbgs());
 }
 #endif
--- a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@ -42,11 +42,11 @@ namespace llvm {
 /// By default, liveness is not used to prune the IDF computation.
 /// The template parameters should be either BasicBlock* or Inverse<BasicBlock
 /// *>, depending on if you want the forward or reverse IDF.
-template <class NodeTy>
+template <class NodeTy, bool IsPostDom>
 class IDFCalculator {
-
-public:
-  IDFCalculator(DominatorTreeBase<BasicBlock> &DT) : DT(DT), useLiveIn(false) {}
+ public:
+  IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
+      : DT(DT), useLiveIn(false) {}

  /// \brief Give the IDF calculator the set of blocks in which the value is
  /// defined.  This is equivalent to the set of starting blocks it should be
@ -84,12 +84,12 @@ class IDFCalculator {
  void calculate(SmallVectorImpl<BasicBlock *> &IDFBlocks);

 private:
-  DominatorTreeBase<BasicBlock> &DT;
-  bool useLiveIn;
-  const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
-  const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
+ DominatorTreeBase<BasicBlock, IsPostDom> &DT;
+ bool useLiveIn;
+ const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
+ const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
 };
-typedef IDFCalculator<BasicBlock *> ForwardIDFCalculator;
-typedef IDFCalculator<Inverse<BasicBlock *>> ReverseIDFCalculator;
+typedef IDFCalculator<BasicBlock *, false> ForwardIDFCalculator;
+typedef IDFCalculator<Inverse<BasicBlock *>, true> ReverseIDFCalculator;
 }
 #endif
--- a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
@ -43,6 +43,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@ -908,7 +909,7 @@ class LazyCallGraph {
  /// This sets up the graph and computes all of the entry points of the graph.
  /// No function definitions are scanned until their nodes in the graph are
  /// requested during traversal.
-  LazyCallGraph(Module &M);
+  LazyCallGraph(Module &M, TargetLibraryInfo &TLI);

  LazyCallGraph(LazyCallGraph &&G);
  LazyCallGraph &operator=(LazyCallGraph &&RHS);
@ -966,6 +967,22 @@ class LazyCallGraph {
    return insertInto(F, N);
  }

+  /// Get the sequence of known and defined library functions.
+  ///
+  /// These functions, because they are known to LLVM, can have calls
+  /// introduced out of thin air from arbitrary IR.
+  ArrayRef<Function *> getLibFunctions() const {
+    return LibFunctions.getArrayRef();
+  }
+
+  /// Test whether a function is a known and defined library function tracked by
+  /// the call graph.
+  ///
+  /// Because these functions are known to LLVM they are specially modeled in
+  /// the call graph and even when all IR-level references have been removed
+  /// remain active and reachable.
+  bool isLibFunction(Function &F) const { return LibFunctions.count(&F); }
+
  ///@{
  /// \name Pre-SCC Mutation API
  ///
@ -1100,6 +1117,11 @@ class LazyCallGraph {
  /// These are all of the RefSCCs which have no children.
  SmallVector<RefSCC *, 4> LeafRefSCCs;

+  /// Defined functions that are also known library functions which the
+  /// optimizer can reason about and therefore might introduce calls to out of
+  /// thin air.
+  SmallSetVector<Function *, 4> LibFunctions;
+
  /// Helper to insert a new function, with an already looked-up entry in
  /// the NodeMap.
  Node &insertInto(Function &F, Node *&MappedN);
@ -1216,8 +1238,8 @@ class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> {
  ///
  /// This just builds the set of entry points to the call graph. The rest is
  /// built lazily as it is walked.
-  LazyCallGraph run(Module &M, ModuleAnalysisManager &) {
-    return LazyCallGraph(M);
+  LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) {
+    return LazyCallGraph(M, AM.getResult<TargetLibraryAnalysis>(M));
  }
 };

--- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
@ -56,7 +56,8 @@ class Loop;
 class MDNode;
 class PHINode;
 class raw_ostream;
-template<class N> class DominatorTreeBase;
+template <class N, bool IsPostDom>
+class DominatorTreeBase;
 template<class N, class M> class LoopInfoBase;
 template<class N, class M> class LoopBase;

@ -663,12 +664,12 @@ class LoopInfoBase {
  }

  /// Create the loop forest using a stable algorithm.
-  void analyze(const DominatorTreeBase<BlockT> &DomTree);
+  void analyze(const DominatorTreeBase<BlockT, false> &DomTree);

  // Debugging
  void print(raw_ostream &OS) const;

-  void verify(const DominatorTreeBase<BlockT> &DomTree) const;
+  void verify(const DominatorTreeBase<BlockT, false> &DomTree) const;
 };

 // Implementation in LoopInfoImpl.h
@ -683,7 +684,7 @@ class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
  LoopInfo(const LoopInfo &) = delete;
 public:
  LoopInfo() {}
-  explicit LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree);
+  explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree);

  LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
  LoopInfo &operator=(LoopInfo &&RHS) {
--- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
@ -340,10 +340,10 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
 /// Discover a subloop with the specified backedges such that: All blocks within
 /// this loop are mapped to this loop or a subloop. And all subloops within this
 /// loop have their parent loop set to this loop or a subloop.
-template<class BlockT, class LoopT>
-static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT*> Backedges,
-                                  LoopInfoBase<BlockT, LoopT> *LI,
-                                  const DominatorTreeBase<BlockT> &DomTree) {
+template <class BlockT, class LoopT>
+static void discoverAndMapSubloop(
+    LoopT *L, ArrayRef<BlockT *> Backedges, LoopInfoBase<BlockT, LoopT> *LI,
+    const DomTreeBase<BlockT> &DomTree) {
  typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;

  unsigned NumBlocks = 0;
@ -462,10 +462,9 @@ void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) {
 ///
 /// The Block vectors are inclusive, so step 3 requires loop-depth number of
 /// insertions per block.
-template<class BlockT, class LoopT>
-void LoopInfoBase<BlockT, LoopT>::
-analyze(const DominatorTreeBase<BlockT> &DomTree) {
-
+template <class BlockT, class LoopT>
+void LoopInfoBase<BlockT, LoopT>::analyze(
+    const DomTreeBase<BlockT> &DomTree) {
  // Postorder traversal of the dominator tree.
  const DomTreeNodeBase<BlockT> *DomRoot = DomTree.getRootNode();
  for (auto DomNode : post_order(DomRoot)) {
@ -607,7 +606,7 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL,

 template <class BlockT, class LoopT>
 void LoopInfoBase<BlockT, LoopT>::verify(
-    const DominatorTreeBase<BlockT> &DomTree) const {
+    const DomTreeBase<BlockT> &DomTree) const {
  DenseSet<const LoopT*> Loops;
  for (iterator I = begin(), E = end(); I != E; ++I) {
    assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
--- a/contrib/llvm/include/llvm/Analysis/PostDominators.h
+++ b/contrib/llvm/include/llvm/Analysis/PostDominators.h
@ -22,10 +22,8 @@ namespace llvm {
 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to
 /// compute the post-dominator tree.
 ///
-struct PostDominatorTree : public DominatorTreeBase<BasicBlock> {
-  typedef DominatorTreeBase<BasicBlock> Base;
-
-  PostDominatorTree() : DominatorTreeBase<BasicBlock>(true) {}
+struct PostDominatorTree : public PostDomTreeBase<BasicBlock> {
+  typedef PostDomTreeBase<BasicBlock> Base;

  /// Handle invalidation explicitly.
  bool invalidate(Function &F, const PreservedAnalyses &PA,
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
@ -237,17 +237,15 @@ struct FoldingSetTrait<SCEVPredicate> : DefaultFoldingSetTrait<SCEVPredicate> {
 };

 /// This class represents an assumption that two SCEV expressions are equal,
-/// and this can be checked at run-time. We assume that the left hand side is
-/// a SCEVUnknown and the right hand side a constant.
+/// and this can be checked at run-time.
 class SCEVEqualPredicate final : public SCEVPredicate {
-  /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a
-  /// constant.
-  const SCEVUnknown *LHS;
-  const SCEVConstant *RHS;
+  /// We assume that LHS == RHS.
+  const SCEV *LHS;
+  const SCEV *RHS;

 public:
-  SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS,
-                     const SCEVConstant *RHS);
+  SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS,
+                     const SCEV *RHS);

  /// Implementation of the SCEVPredicate interface
  bool implies(const SCEVPredicate *N) const override;
@ -256,10 +254,10 @@ class SCEVEqualPredicate final : public SCEVPredicate {
  const SCEV *getExpr() const override;

  /// Returns the left hand side of the equality.
-  const SCEVUnknown *getLHS() const { return LHS; }
+  const SCEV *getLHS() const { return LHS; }

  /// Returns the right hand side of the equality.
-  const SCEVConstant *getRHS() const { return RHS; }
+  const SCEV *getRHS() const { return RHS; }

  /// Methods for support type inquiry through isa, cast, and dyn_cast:
  static bool classof(const SCEVPredicate *P) {
@ -1241,6 +1239,14 @@ class ScalarEvolution {
    SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end());
    return getAddRecExpr(NewOp, L, Flags);
  }
+
+  /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some
+  /// Predicates. If successful return these <AddRecExpr, Predicates>;
+  /// The function is intended to be called from PSCEV (the caller will decide
+  /// whether to actually add the predicates and carry out the rewrites).
+  Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+  createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI);
+  
  /// Returns an expression for a GEP
  ///
  /// \p GEP The GEP. The indices contained in the GEP itself are ignored,
@ -1675,8 +1681,7 @@ class ScalarEvolution {
    return F.getParent()->getDataLayout();
  }

-  const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS,
-                                         const SCEVConstant *RHS);
+  const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS);

  const SCEVPredicate *
  getWrapPredicate(const SCEVAddRecExpr *AR,
@ -1692,6 +1697,19 @@ class ScalarEvolution {
      SmallPtrSetImpl<const SCEVPredicate *> &Preds);

 private:
+  /// Similar to createAddRecFromPHI, but with the additional flexibility of 
+  /// suggesting runtime overflow checks in case casts are encountered.
+  /// If successful, the analysis records that for this loop, \p SymbolicPHI,
+  /// which is the UnknownSCEV currently representing the PHI, can be rewritten
+  /// into an AddRec, assuming some predicates; The function then returns the
+  /// AddRec and the predicates as a pair, and caches this pair in
+  /// PredicatedSCEVRewrites.
+  /// If the analysis is not successful, a mapping from the \p SymbolicPHI to 
+  /// itself (with no predicates) is recorded, and a nullptr with an empty
+  /// predicates vector is returned as a pair. 
+  Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+  createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI);
+
  /// Compute the backedge taken count knowing the interval difference, the
  /// stride and presence of the equality in the comparison.
  const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride,
@ -1722,6 +1740,12 @@ class ScalarEvolution {
  FoldingSet<SCEVPredicate> UniquePreds;
  BumpPtrAllocator SCEVAllocator;

+  /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression
+  /// they can be rewritten into under certain predicates.
+  DenseMap<std::pair<const SCEVUnknown *, const Loop *>,
+           std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+      PredicatedSCEVRewrites;
+   
  /// The head of a linked list of all SCEVUnknown values that have been
  /// allocated. This is used by releaseMemory to locate them all and call
  /// their destructors.
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -155,6 +155,13 @@ class TargetTransformInfo {
  int getGEPCost(Type *PointeeType, const Value *Ptr,
                 ArrayRef<const Value *> Operands) const;

+  /// \brief Estimate the cost of a EXT operation when lowered.
+  ///
+  /// The contract for this function is the same as \c getOperationCost except
+  /// that it supports an interface that provides extra information specific to
+  /// the EXT operation.
+  int getExtCost(const Instruction *I, const Value *Src) const;
+
  /// \brief Estimate the cost of a function call when lowered.
  ///
  /// The contract for this is the same as \c getOperationCost except that it
@ -849,6 +856,7 @@ class TargetTransformInfo::Concept {
  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
                         ArrayRef<const Value *> Operands) = 0;
+  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
  virtual int getCallCost(const Function *F, int NumArgs) = 0;
  virtual int getCallCost(const Function *F,
@ -1022,6 +1030,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
                 ArrayRef<const Value *> Operands) override {
    return Impl.getGEPCost(PointeeType, Ptr, Operands);
  }
+  int getExtCost(const Instruction *I, const Value *Src) override {
+    return Impl.getExtCost(I, Src);
+  }
  int getCallCost(FunctionType *FTy, int NumArgs) override {
    return Impl.getCallCost(FTy, NumArgs);
  }
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -120,6 +120,10 @@ class TargetTransformInfoImplBase {
    return SI.getNumCases();
  }

+  int getExtCost(const Instruction *I, const Value *Src) {
+    return TTI::TCC_Basic;
+  }
+
  unsigned getCallCost(FunctionType *FTy, int NumArgs) {
    assert(FTy && "FunctionType must be provided to this routine.");

@ -728,6 +732,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
      // nop on most sane targets.
      if (isa<CmpInst>(CI->getOperand(0)))
        return TTI::TCC_Free;
+      if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI))
+        return static_cast<T *>(this)->getExtCost(CI, Operands.back());
    }

    return static_cast<T *>(this)->getOperationCost(
--- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@ -155,6 +155,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
    return BaseT::getGEPCost(PointeeType, Ptr, Operands);
  }

+  int getExtCost(const Instruction *I, const Value *Src) {
+    if (getTLI()->isExtFree(I))
+      return TargetTransformInfo::TCC_Free;
+
+    if (isa<ZExtInst>(I) || isa<SExtInst>(I))
+      if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
+        if (getTLI()->isExtLoad(LI, I, DL))
+          return TargetTransformInfo::TCC_Free;
+
+    return TargetTransformInfo::TCC_Basic;
+  }
+
  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                            ArrayRef<const Value *> Arguments) {
    return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
--- a/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h
@ -23,27 +23,24 @@ class MachineDominanceFrontier : public MachineFunctionPass {
  ForwardDominanceFrontierBase<MachineBasicBlock> Base;

 public:
-  using DomTreeT = DominatorTreeBase<MachineBasicBlock>;
-  using DomTreeNodeT = DomTreeNodeBase<MachineBasicBlock>;
-  using DomSetType = DominanceFrontierBase<MachineBasicBlock>::DomSetType;
-  using iterator = DominanceFrontierBase<MachineBasicBlock>::iterator;
-  using const_iterator =
-      DominanceFrontierBase<MachineBasicBlock>::const_iterator;
+ using DomTreeT = DomTreeBase<MachineBasicBlock>;
+ using DomTreeNodeT = DomTreeNodeBase<MachineBasicBlock>;
+ using DomSetType = DominanceFrontierBase<MachineBasicBlock, false>::DomSetType;
+ using iterator = DominanceFrontierBase<MachineBasicBlock, false>::iterator;
+ using const_iterator =
+     DominanceFrontierBase<MachineBasicBlock, false>::const_iterator;

-  MachineDominanceFrontier(const MachineDominanceFrontier &) = delete;
-  MachineDominanceFrontier &
-  operator=(const MachineDominanceFrontier &) = delete;
+ MachineDominanceFrontier(const MachineDominanceFrontier &) = delete;
+ MachineDominanceFrontier &operator=(const MachineDominanceFrontier &) = delete;

-  static char ID;
+ static char ID;

-  MachineDominanceFrontier();
+ MachineDominanceFrontier();

-  DominanceFrontierBase<MachineBasicBlock> &getBase() {
-    return Base;
-  }
+ DominanceFrontierBase<MachineBasicBlock, false> &getBase() { return Base; }

-  inline const std::vector<MachineBasicBlock*> &getRoots() const {
-    return Base.getRoots();
+ inline const std::vector<MachineBasicBlock *> &getRoots() const {
+   return Base.getRoots();
  }

  MachineBasicBlock *getRoot() const {
@ -98,7 +95,7 @@ class MachineDominanceFrontier : public MachineFunctionPass {
    return Base.compareDomSet(DS1, DS2);
  }

-  bool compare(DominanceFrontierBase<MachineBasicBlock> &Other) const {
+  bool compare(DominanceFrontierBase<MachineBasicBlock, false> &Other) const {
    return Base.compare(Other);
  }

--- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
@ -28,13 +28,15 @@

 namespace llvm {

-template<>
-inline void DominatorTreeBase<MachineBasicBlock>::addRoot(MachineBasicBlock* MBB) {
+template <>
+inline void DominatorTreeBase<MachineBasicBlock, false>::addRoot(
+    MachineBasicBlock *MBB) {
  this->Roots.push_back(MBB);
 }

 extern template class DomTreeNodeBase<MachineBasicBlock>;
-extern template class DominatorTreeBase<MachineBasicBlock>;
+extern template class DominatorTreeBase<MachineBasicBlock, false>; // DomTree
+extern template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTree

 using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;

@ -65,7 +67,7 @@ class MachineDominatorTree : public MachineFunctionPass {
  mutable SmallSet<MachineBasicBlock *, 32> NewBBs;

  /// The DominatorTreeBase that is used to compute a normal dominator tree
-  std::unique_ptr<DominatorTreeBase<MachineBasicBlock>> DT;
+  std::unique_ptr<DomTreeBase<MachineBasicBlock>> DT;

  /// \brief Apply all the recorded critical edges to the DT.
  /// This updates the underlying DT information in a way that uses
@ -79,9 +81,8 @@ class MachineDominatorTree : public MachineFunctionPass {

  MachineDominatorTree();

-  DominatorTreeBase<MachineBasicBlock> &getBase() {
-    if (!DT)
-      DT.reset(new DominatorTreeBase<MachineBasicBlock>(false));
+  DomTreeBase<MachineBasicBlock> &getBase() {
+    if (!DT) DT.reset(new DomTreeBase<MachineBasicBlock>());
    applySplitCriticalEdges();
    return *DT;
  }
--- a/contrib/llvm/include/llvm/CodeGen/MachinePostDominators.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachinePostDominators.h
@ -26,7 +26,7 @@ namespace llvm {
 ///
 struct MachinePostDominatorTree : public MachineFunctionPass {
 private:
-  DominatorTreeBase<MachineBasicBlock> *DT;
+ PostDomTreeBase<MachineBasicBlock> *DT;

 public:
  static char ID;
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
@ -17,7 +17,6 @@
 namespace llvm {
 namespace codeview {
 class TypeCollection;
-class TypeServerHandler;
 class TypeVisitorCallbacks;

 enum VisitorDataSource {
@ -31,11 +30,9 @@ enum VisitorDataSource {

 Error visitTypeRecord(CVType &Record, TypeIndex Index,
                      TypeVisitorCallbacks &Callbacks,
-                      VisitorDataSource Source = VDS_BytesPresent,
-                      TypeServerHandler *TS = nullptr);
+                      VisitorDataSource Source = VDS_BytesPresent);
 Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks,
-                      VisitorDataSource Source = VDS_BytesPresent,
-                      TypeServerHandler *TS = nullptr);
+                      VisitorDataSource Source = VDS_BytesPresent);

 Error visitMemberRecord(CVMemberRecord Record, TypeVisitorCallbacks &Callbacks,
                        VisitorDataSource Source = VDS_BytesPresent);
@ -46,12 +43,9 @@ Error visitMemberRecordStream(ArrayRef<uint8_t> FieldList,
                              TypeVisitorCallbacks &Callbacks);

 Error visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks,
-                      VisitorDataSource Source = VDS_BytesPresent,
-                      TypeServerHandler *TS = nullptr);
-Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks,
-                      TypeServerHandler *TS = nullptr);
-Error visitTypeStream(TypeCollection &Types, TypeVisitorCallbacks &Callbacks,
-                      TypeServerHandler *TS = nullptr);
+                      VisitorDataSource Source = VDS_BytesPresent);
+Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks);
+Error visitTypeStream(TypeCollection &Types, TypeVisitorCallbacks &Callbacks);

 } // end namespace codeview
 } // end namespace llvm
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@ -84,7 +84,7 @@ class CodeViewRecordIO {
  Error mapEncodedInteger(uint64_t &Value);
  Error mapEncodedInteger(APSInt &Value);
  Error mapStringZ(StringRef &Value);
-  Error mapGuid(StringRef &Guid);
+  Error mapGuid(GUID &Guid);

  Error mapStringZVectorZ(std::vector<StringRef> &Value);

--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/Formatters.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/Formatters.h
@ -12,6 +12,7 @@

 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/Support/FormatAdapters.h"
 #include "llvm/Support/FormatVariadic.h"
@ -31,7 +32,7 @@ class GuidAdapter final : public FormatAdapter<ArrayRef<uint8_t>> {
  explicit GuidAdapter(ArrayRef<uint8_t> Guid);
  explicit GuidAdapter(StringRef Guid);

-  void format(raw_ostream &Stream, StringRef Style) override ;
+  void format(raw_ostream &Stream, StringRef Style) override;
 };

 } // end namespace detail
@ -60,6 +61,13 @@ template <> struct format_provider<codeview::TypeIndex> {
  }
 };

+template <> struct format_provider<codeview::GUID> {
+  static void format(const codeview::GUID &V, llvm::raw_ostream &Stream,
+                     StringRef Style) {
+    Stream << V;
+  }
+};
+
 } // end namespace llvm

 #endif // LLVM_DEBUGINFO_CODEVIEW_FORMATTERS_H
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/GUID.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/GUID.h
@ -0,0 +1,55 @@
+//===- GUID.h ---------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_GUID_H
+#define LLVM_DEBUGINFO_CODEVIEW_GUID_H
+
+#include <cstdint>
+#include <cstring>
+
+namespace llvm {
+class raw_ostream;
+
+namespace codeview {
+
+/// This represents the 'GUID' type from windows.h.
+struct GUID {
+  uint8_t Guid[16];
+};
+
+inline bool operator==(const GUID &LHS, const GUID &RHS) {
+  return 0 == ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid));
+}
+
+inline bool operator<(const GUID &LHS, const GUID &RHS) {
+  return ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)) < 0;
+}
+
+inline bool operator<=(const GUID &LHS, const GUID &RHS) {
+  return ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid)) <= 0;
+}
+
+inline bool operator>(const GUID &LHS, const GUID &RHS) {
+  return !(LHS <= RHS);
+}
+
+inline bool operator>=(const GUID &LHS, const GUID &RHS) {
+  return !(LHS < RHS);
+}
+
+inline bool operator!=(const GUID &LHS, const GUID &RHS) {
+  return !(LHS == RHS);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const GUID &Guid);
+
+} // namespace codeview
+} // namespace llvm
+
+#endif
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@ -848,7 +848,7 @@ class BuildInfoSym : public SymbolRecord {
      : SymbolRecord(SymbolRecordKind::BuildInfoSym),
        RecordOffset(RecordOffset) {}

-  uint32_t BuildId;
+  TypeIndex BuildId;

  uint32_t RecordOffset;
 };
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
@ -18,6 +18,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/Support/BinaryStreamArray.h"
 #include "llvm/Support/Endian.h"
@ -539,15 +540,17 @@ class TypeServer2Record : public TypeRecord {
 public:
  TypeServer2Record() = default;
  explicit TypeServer2Record(TypeRecordKind Kind) : TypeRecord(Kind) {}
-  TypeServer2Record(StringRef Guid, uint32_t Age, StringRef Name)
-      : TypeRecord(TypeRecordKind::TypeServer2), Guid(Guid), Age(Age),
-        Name(Name) {}
+  TypeServer2Record(StringRef GuidStr, uint32_t Age, StringRef Name)
+      : TypeRecord(TypeRecordKind::TypeServer2), Age(Age), Name(Name) {
+    assert(GuidStr.size() == 16 && "guid isn't 16 bytes");
+    ::memcpy(Guid.Guid, GuidStr.data(), 16);
+  }

-  StringRef getGuid() const { return Guid; }
+  const GUID &getGuid() const { return Guid; }
  uint32_t getAge() const { return Age; }
  StringRef getName() const { return Name; }

-  StringRef Guid;
+  GUID Guid;
  uint32_t Age;
  StringRef Name;
 };
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeServerHandler.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeServerHandler.h
@ -1,38 +0,0 @@
-//===- TypeServerHandler.h --------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H
-#define LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H
-
-#include "llvm/Support/Error.h"
-
-namespace llvm {
-namespace codeview {
-
-class TypeServer2Record;
-class TypeVisitorCallbacks;
-
-class TypeServerHandler {
-public:
-  virtual ~TypeServerHandler() = default;
-
-  /// Handle a TypeServer record.  If the implementation returns true
-  /// the record will not be processed by the top-level visitor.  If
-  /// it returns false, it will be processed.  If it returns an Error,
-  /// then the top-level visitor will fail.
-  virtual Expected<bool> handle(TypeServer2Record &TS,
-                                TypeVisitorCallbacks &Callbacks) {
-    return false;
-  }
-};
-
-} // end namespace codeview
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_CODEVIEW_TYPESERVERHANDLER_H
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
@ -19,7 +19,6 @@ namespace llvm {
 namespace codeview {

 class TypeIndex;
-class TypeServerHandler;
 class TypeTableBuilder;

 /// \brief Merge one set of type records into another.  This method assumes
@ -31,16 +30,13 @@ class TypeTableBuilder;
 /// type stream, that contains the index of the corresponding type record
 /// in the destination stream.
 ///
-/// \param Handler (optional) If non-null, an interface that gets invoked
-/// to handle type server records.
-///
 /// \param Types The collection of types to merge in.
 ///
 /// \returns Error::success() if the operation succeeded, otherwise an
 /// appropriate error code.
 Error mergeTypeRecords(TypeTableBuilder &Dest,
                       SmallVectorImpl<TypeIndex> &SourceToDest,
-                       TypeServerHandler *Handler, const CVTypeArray &Types);
+                       const CVTypeArray &Types);

 /// \brief Merge one set of id records into another.  This method assumes
 /// that all records are id records, and there are no Type records present.
@ -65,7 +61,7 @@ Error mergeTypeRecords(TypeTableBuilder &Dest,
 /// appropriate error code.
 Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef<TypeIndex> Types,
                     SmallVectorImpl<TypeIndex> &SourceToDest,
-  const CVTypeArray &Ids);
+                     const CVTypeArray &Ids);

 /// \brief Merge a unified set of type and id records, splitting them into
 /// separate output streams.
@ -78,9 +74,6 @@ Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef<TypeIndex> Types,
 /// id stream, that contains the index of the corresponding id record
 /// in the destination stream.
 ///
-/// \param Handler (optional) If non-null, an interface that gets invoked
-/// to handle type server records.
-///
 /// \param IdsAndTypes The collection of id records to merge in.
 ///
 /// \returns Error::success() if the operation succeeded, otherwise an
@ -88,8 +81,7 @@ Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef<TypeIndex> Types,
 Error mergeTypeAndIdRecords(TypeTableBuilder &DestIds,
                            TypeTableBuilder &DestTypes,
                            SmallVectorImpl<TypeIndex> &SourceToDest,
-                            TypeServerHandler *Handler,
-  const CVTypeArray &IdsAndTypes);
+                            const CVTypeArray &IdsAndTypes);

 } // end namespace codeview
 } // end namespace llvm
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@ -238,6 +238,34 @@ class DWARFUnit {

  uint8_t getUnitType() const { return UnitType; }

+  static bool isValidUnitType(uint8_t UnitType) {
+    return UnitType == dwarf::DW_UT_compile || UnitType == dwarf::DW_UT_type ||
+           UnitType == dwarf::DW_UT_partial ||
+           UnitType == dwarf::DW_UT_skeleton ||
+           UnitType == dwarf::DW_UT_split_compile ||
+           UnitType == dwarf::DW_UT_split_type;
+  }
+
+  /// \brief Return the number of bytes for the header of a unit of
+  /// UnitType type.
+  ///
+  /// This function must be called with a valid unit type which in
+  /// DWARF5 is defined as one of the following six types.
+  static uint32_t getDWARF5HeaderSize(uint8_t UnitType) {
+    switch (UnitType) {
+    case dwarf::DW_UT_compile:
+    case dwarf::DW_UT_partial:
+      return 12;
+    case dwarf::DW_UT_skeleton:
+    case dwarf::DW_UT_split_compile:
+      return 20;
+    case dwarf::DW_UT_type:
+    case dwarf::DW_UT_split_type:
+      return 24;
+    }
+    llvm_unreachable("Invalid UnitType.");
+  }
+
  uint64_t getBaseAddress() const { return BaseAddr; }

  void setBaseAddress(uint64_t base_addr) {
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@ -21,6 +21,7 @@ class DWARFContext;
 class DWARFDie;
 class DWARFUnit;
 class DWARFAcceleratorTable;
+class DWARFDataExtractor;

 /// A class that verifies DWARF debug information given a DWARF Context.
 class DWARFVerifier {
@ -30,10 +31,35 @@ class DWARFVerifier {
  /// can verify each reference points to a valid DIE and not an offset that
  /// lies between to valid DIEs.
  std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets;
-  uint32_t NumDebugInfoErrors = 0;
  uint32_t NumDebugLineErrors = 0;
  uint32_t NumAppleNamesErrors = 0;

+  /// Verifies the header of a unit in the .debug_info section.
+  ///
+  /// This function currently checks for:
+  /// - Unit is in 32-bit DWARF format. The function can be modified to
+  /// support 64-bit format.
+  /// - The DWARF version is valid
+  /// - The unit type is valid (if unit is in version >=5)
+  /// - The unit doesn't extend beyond .debug_info section
+  /// - The address size is valid
+  /// - The offset in the .debug_abbrev section is valid
+  ///
+  /// \param DebugInfoData The .debug_info section data
+  /// \param Offset A reference to the offset start of the unit. The offset will
+  /// be updated to point to the next unit in .debug_info
+  /// \param UnitIndex The index of the unit to be verified
+  /// \param UnitType A reference to the type of the unit
+  /// \param isUnitDWARF64 A reference to a flag that shows whether the unit is
+  /// in 64-bit format.
+  ///
+  /// \returns true if the header is verified successfully, false otherwise.
+  bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
+                        uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
+                        bool &isUnitDWARF64);
+
+
+  bool verifyUnitContents(DWARFUnit Unit);
  /// Verifies the attribute's DWARF attribute and its value.
  ///
  /// This function currently checks for:
@ -42,7 +68,11 @@ class DWARFVerifier {
  ///
  /// \param Die          The DWARF DIE that owns the attribute value
  /// \param AttrValue    The DWARF attribute value to check
-  void verifyDebugInfoAttribute(const DWARFDie &Die, DWARFAttribute &AttrValue);
+  ///
+  /// \returns NumErrors The number of errors occured during verification of
+  /// attributes' values in a .debug_info section unit
+  unsigned verifyDebugInfoAttribute(const DWARFDie &Die,
+                                    DWARFAttribute &AttrValue);

  /// Verifies the attribute's DWARF form.
  ///
@ -53,7 +83,10 @@ class DWARFVerifier {
  ///
  /// \param Die          The DWARF DIE that owns the attribute value
  /// \param AttrValue    The DWARF attribute value to check
-  void verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);
+  ///
+  /// \returns NumErrors The number of errors occured during verification of
+  /// attributes' forms in a .debug_info section unit
+  unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);

  /// Verifies the all valid references that were found when iterating through
  /// all of the DIE attributes.
@ -62,7 +95,10 @@ class DWARFVerifier {
  /// offset matches. This helps to ensure if a DWARF link phase moved things
  /// around, that it doesn't create invalid references by failing to relocate
  /// CU relative and absolute references.
-  void verifyDebugInfoReferences();
+  ///
+  /// \returns NumErrors The number of errors occured during verification of
+  /// references for the .debug_info section
+  unsigned verifyDebugInfoReferences();

  /// Verify the the DW_AT_stmt_list encoding and value and ensure that no
  /// compile units that have the same DW_AT_stmt_list value.
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
@ -106,7 +106,7 @@ class DIARawSymbol : public IPDBRawSymbol {
  getVirtualBaseTableType() const override;
  PDB_DataKind getDataKind() const override;
  PDB_SymType getSymTag() const override;
-  PDB_UniqueId getGuid() const override;
+  codeview::GUID getGuid() const override;
  int32_t getOffset() const override;
  int32_t getThisAdjust() const override;
  int32_t getVirtualBasePointerOffset() const override;
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
@ -19,6 +19,7 @@ namespace pdb {
 enum class generic_error_code {
  invalid_path = 1,
  dia_sdk_not_present,
+  type_server_not_found,
  unspecified,
 };

--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
@ -118,7 +118,7 @@ class IPDBRawSymbol {
  virtual uint32_t getVirtualTableShapeId() const = 0;
  virtual PDB_DataKind getDataKind() const = 0;
  virtual PDB_SymType getSymTag() const = 0;
-  virtual PDB_UniqueId getGuid() const = 0;
+  virtual codeview::GUID getGuid() const = 0;
  virtual int32_t getOffset() const = 0;
  virtual int32_t getThisAdjust() const = 0;
  virtual int32_t getVirtualBasePointerOffset() const = 0;
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/Formatters.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/Formatters.h
@ -23,13 +23,6 @@
    break;

 namespace llvm {
-template <> struct format_provider<pdb::PDB_UniqueId> {
-  static void format(const pdb::PDB_UniqueId &V, llvm::raw_ostream &Stream,
-                     StringRef Style) {
-    codeview::fmt_guid(V.Guid).format(Stream, Style);
-  }
-};
-
 template <> struct format_provider<pdb::PdbRaw_ImplVer> {
  static void format(const pdb::PdbRaw_ImplVer &V, llvm::raw_ostream &Stream,
                     StringRef Style) {
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h
@ -12,6 +12,7 @@

 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
@ -39,7 +40,7 @@ class InfoStream {
  PdbRaw_ImplVer getVersion() const;
  uint32_t getSignature() const;
  uint32_t getAge() const;
-  PDB_UniqueId getGuid() const;
+  codeview::GUID getGuid() const;
  uint32_t getNamedStreamMapByteSize() const;

  PdbRaw_Features getFeatures() const;
@ -71,7 +72,7 @@ class InfoStream {
  // Due to the aforementioned limitations with `Signature`, this is a new
  // signature present on VC70 and higher PDBs which is guaranteed to be
  // universally unique.
-  PDB_UniqueId Guid;
+  codeview::GUID Guid;

  BinarySubstreamRef SubNamedStreams;

--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
@ -37,7 +37,7 @@ class InfoStreamBuilder {
  void setVersion(PdbRaw_ImplVer V);
  void setSignature(uint32_t S);
  void setAge(uint32_t A);
-  void setGuid(PDB_UniqueId G);
+  void setGuid(codeview::GUID G);
  void addFeature(PdbRaw_FeatureSig Sig);

  uint32_t finalize();
@ -54,7 +54,7 @@ class InfoStreamBuilder {
  PdbRaw_ImplVer Ver;
  uint32_t Sig;
  uint32_t Age;
-  PDB_UniqueId Guid;
+  codeview::GUID Guid;

  NamedStreamMap &NamedStreams;
 };
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
@ -27,7 +27,7 @@ class NativeExeSymbol : public NativeRawSymbol {

  uint32_t getAge() const override;
  std::string getSymbolsFileName() const override;
-  PDB_UniqueId getGuid() const override;
+  codeview::GUID getGuid() const override;
  bool hasCTypes() const override;
  bool hasPrivateSymbols() const override;

--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
@ -111,7 +111,7 @@ class NativeRawSymbol : public IPDBRawSymbol {
  getVirtualBaseTableType() const override;
  PDB_DataKind getDataKind() const override;
  PDB_SymType getSymTag() const override;
-  PDB_UniqueId getGuid() const override;
+  codeview::GUID getGuid() const override;
  int32_t getOffset() const override;
  int32_t getThisAdjust() const override;
  int32_t getVirtualBasePointerOffset() const override;
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h
@ -1,46 +0,0 @@
-//===- PDBTypeServerHandler.h -----------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_PDBTYPESERVERHANDLER_H
-#define LLVM_DEBUGINFO_PDB_PDBTYPESERVERHANDLER_H
-
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/PDBTypes.h"
-
-#include <memory>
-#include <string>
-
-namespace llvm {
-namespace pdb {
-class NativeSession;
-
-class PDBTypeServerHandler : public codeview::TypeServerHandler {
-public:
-  PDBTypeServerHandler(bool RevisitAlways = false);
-
-  void addSearchPath(StringRef Path);
-  Expected<bool> handle(codeview::TypeServer2Record &TS,
-                        codeview::TypeVisitorCallbacks &Callbacks) override;
-
-private:
-  Expected<bool> handleInternal(PDBFile &File,
-                                codeview::TypeVisitorCallbacks &Callbacks);
-
-  bool RevisitAlways;
-  std::unique_ptr<NativeSession> Session;
-  StringSet<> SearchPaths;
-};
-}
-}
-
-#endif
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
@ -10,6 +10,7 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_RAWTYPES_H
 #define LLVM_DEBUGINFO_PDB_RAW_RAWTYPES_H

+#include "llvm/DebugInfo/CodeView/GUID.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/Support/Endian.h"

@ -268,17 +269,6 @@ struct PublicsStreamHeader {
  support::ulittle32_t NumSections;
 };

-/// Defines a 128-bit unique identifier.  This maps to a GUID on Windows, but
-/// is abstracted here for the purposes of non-Windows platforms that don't have
-/// the GUID structure defined.
-struct PDB_UniqueId {
-  uint8_t Guid[16];
-};
-
-inline bool operator==(const PDB_UniqueId &LHS, const PDB_UniqueId &RHS) {
-  return 0 == ::memcmp(LHS.Guid, RHS.Guid, sizeof(LHS.Guid));
-}
-
 // The header preceeding the global TPI stream.
 // This corresponds to `HDR` in PDB/dbi/tpi.h.
 struct TpiStreamHeader {
@ -312,7 +302,7 @@ struct InfoStreamHeader {
  support::ulittle32_t Version;
  support::ulittle32_t Signature;
  support::ulittle32_t Age;
-  PDB_UniqueId Guid;
+  codeview::GUID Guid;
 };

 /// The header preceeding the /names stream.
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
@ -10,84 +10,13 @@
 #ifndef LLVM_DEBUGINFO_PDB_TPIHASHING_H
 #define LLVM_DEBUGINFO_PDB_TPIHASHING_H

-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
-#include <cstdint>
-#include <string>

 namespace llvm {
 namespace pdb {

-class TpiHashUpdater : public codeview::TypeVisitorCallbacks {
-public:
-  TpiHashUpdater() = default;
-
-#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
-  virtual Error visitKnownRecord(codeview::CVType &CVR,                        \
-                                 codeview::Name##Record &Record) override {    \
-    visitKnownRecordImpl(CVR, Record);                                         \
-    return Error::success();                                                   \
-  }
-#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#define MEMBER_RECORD(EnumName, EnumVal, Name)
-#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
-
-private:
-  template <typename RecordKind>
-  void visitKnownRecordImpl(codeview::CVType &CVR, RecordKind &Record) {
-    CVR.Hash = 0;
-  }
-
-  void visitKnownRecordImpl(codeview::CVType &CVR,
-                            codeview::UdtSourceLineRecord &Rec);
-  void visitKnownRecordImpl(codeview::CVType &CVR,
-                            codeview::UdtModSourceLineRecord &Rec);
-  void visitKnownRecordImpl(codeview::CVType &CVR, codeview::ClassRecord &Rec);
-  void visitKnownRecordImpl(codeview::CVType &CVR, codeview::EnumRecord &Rec);
-  void visitKnownRecordImpl(codeview::CVType &CVR, codeview::UnionRecord &Rec);
-};
-
-class TpiHashVerifier : public codeview::TypeVisitorCallbacks {
-public:
-  TpiHashVerifier(FixedStreamArray<support::ulittle32_t> &HashValues,
-                  uint32_t NumHashBuckets)
-      : HashValues(HashValues), NumHashBuckets(NumHashBuckets) {}
-
-  Error visitKnownRecord(codeview::CVType &CVR,
-                         codeview::UdtSourceLineRecord &Rec) override;
-  Error visitKnownRecord(codeview::CVType &CVR,
-                         codeview::UdtModSourceLineRecord &Rec) override;
-  Error visitKnownRecord(codeview::CVType &CVR,
-                         codeview::ClassRecord &Rec) override;
-  Error visitKnownRecord(codeview::CVType &CVR,
-                         codeview::EnumRecord &Rec) override;
-  Error visitKnownRecord(codeview::CVType &CVR,
-                         codeview::UnionRecord &Rec) override;
-  Error visitTypeBegin(codeview::CVType &CVR) override;
-
-private:
-  Error verifySourceLine(codeview::TypeIndex TI);
-
-  Error errorInvalidHash() {
-    return make_error<RawError>(
-        raw_error_code::invalid_tpi_hash,
-        "Type index is 0x" +
-            utohexstr(codeview::TypeIndex::FirstNonSimpleIndex + Index));
-  }
-
-  FixedStreamArray<support::ulittle32_t> HashValues;
-  codeview::CVType RawRecord;
-  uint32_t NumHashBuckets;
-  uint32_t Index = -1;
-};
+Expected<uint32_t> hashTypeRecord(const llvm::codeview::CVType &Type);

 } // end namespace pdb
 } // end namespace llvm
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
@ -32,7 +32,6 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_Checksum &Checksum);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_Lang &Lang);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_SymType &Tag);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_MemberAccess &Access);
-raw_ostream &operator<<(raw_ostream &OS, const PDB_UniqueId &Guid);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_UdtType &Type);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine);

--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@ -22,6 +22,7 @@
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
+#include "llvm/ExecutionEngine/Orc/OrcError.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constant.h"
@ -289,21 +290,21 @@ class CompileOnDemandLayer {
  // FIXME: We should track and free associated resources (unused compile
  //        callbacks, uncompiled IR, and no-longer-needed/reachable function
  //        implementations).
-  // FIXME: Return Error once the JIT APIs are Errorized.
-  bool updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) {
+  Error updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) {
    //Find out which logical dylib contains our symbol
    auto LDI = LogicalDylibs.begin();
    for (auto LDE = LogicalDylibs.end(); LDI != LDE; ++LDI) {
-      if (auto LMResources = LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) {
+      if (auto LMResources =
+            LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) {
        Module &SrcM = LMResources->SourceModule->getResource();
        std::string CalledFnName = mangle(FuncName, SrcM.getDataLayout());
-        if (auto EC = LMResources->StubsMgr->updatePointer(CalledFnName, FnBodyAddr))
-          return false;
-        else
-          return true;
+        if (auto Err = LMResources->StubsMgr->updatePointer(CalledFnName,
+                                                            FnBodyAddr))
+          return Err;
+        return Error::success();
      }
    }
-    return false;
+    return make_error<JITSymbolNotFound>(FuncName);
  }

 private:
@ -363,11 +364,8 @@ class CompileOnDemandLayer {
          });
      }

-      auto EC = LD.StubsMgr->createStubs(StubInits);
-      (void)EC;
-      // FIXME: This should be propagated back to the user. Stub creation may
-      //        fail for remote JITs.
-      assert(!EC && "Error generating stubs");
+      if (auto Err = LD.StubsMgr->createStubs(StubInits))
+        return Err;
    }

    // If this module doesn't contain any globals, aliases, or module flags then
--- a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@ -135,12 +135,13 @@ class RTDyldMemoryManager : public MCJITMemoryManager,
  virtual void *getPointerToNamedFunction(const std::string &Name,
                                          bool AbortOnFailure = true);

-private:
+protected:
  struct EHFrame {
    uint8_t *Addr;
    size_t Size;
  };
-  std::vector<EHFrame> EHFrames;
+  typedef std::vector<EHFrame> EHFrameInfos;
+  EHFrameInfos EHFrames;
 };

 // Create wrappers for C Binding types (see CBindingWrapping.h).
--- a/contrib/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm/include/llvm/IR/CallingConv.h
@ -143,11 +143,15 @@ namespace CallingConv {
    /// System V ABI, used on most non-Windows systems.
    X86_64_SysV = 78,

-    /// \brief The C convention as implemented on Windows/x86-64. This
-    /// convention differs from the more common \c X86_64_SysV convention
-    /// in a number of ways, most notably in that XMM registers used to pass
-    /// arguments are shadowed by GPRs, and vice versa.
-    X86_64_Win64 = 79,
+    /// \brief The C convention as implemented on Windows/x86-64 and
+    /// AArch64. This convention differs from the more common
+    /// \c X86_64_SysV convention in a number of ways, most notably in
+    /// that XMM registers used to pass arguments are shadowed by GPRs,
+    /// and vice versa.
+    /// On AArch64, this is identical to the normal C (AAPCS) calling
+    /// convention for normal functions, but floats are passed in integer
+    /// registers to variadic functions.
+    Win64 = 79,

    /// \brief MSVC calling convention that passes vectors and vector aggregates
    /// in SSE registers.
--- a/contrib/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm/include/llvm/IR/Constants.h
@ -598,6 +598,10 @@ class ConstantDataSequential : public ConstantData {
  /// specified element in the low bits of a uint64_t.
  uint64_t getElementAsInteger(unsigned i) const;

+  /// If this is a sequential container of integers (of any size), return the
+  /// specified element as an APInt.
+  APInt getElementAsAPInt(unsigned i) const;
+
  /// If this is a sequential container of floating point type, return the
  /// specified element as an APFloat.
  APFloat getElementAsAPFloat(unsigned i) const;
@ -761,6 +765,10 @@ class ConstantDataVector final : public ConstantDataSequential {
  /// i32/i64/float/double) and must be a ConstantFP or ConstantInt.
  static Constant *getSplat(unsigned NumElts, Constant *Elt);

+  /// Returns true if this is a splat constant, meaning that all elements have
+  /// the same value.
+  bool isSplat() const;
+
  /// If this is a splat constant, meaning that all of the elements have the
  /// same value, return that value. Otherwise return NULL.
  Constant *getSplatValue() const;
--- a/contrib/llvm/include/llvm/IR/DIBuilder.h
+++ b/contrib/llvm/include/llvm/IR/DIBuilder.h
@ -674,32 +674,37 @@ namespace llvm {

    /// Create a descriptor for an imported module.
    /// \param Context The scope this module is imported into
-    /// \param NS The namespace being imported here
-    /// \param Line Line number
+    /// \param NS      The namespace being imported here.
+    /// \param File    File where the declaration is located.
+    /// \param Line    Line number of the declaration.
    DIImportedEntity *createImportedModule(DIScope *Context, DINamespace *NS,
-                                           unsigned Line);
+                                           DIFile *File, unsigned Line);

    /// Create a descriptor for an imported module.
-    /// \param Context The scope this module is imported into
-    /// \param NS An aliased namespace
-    /// \param Line Line number
+    /// \param Context The scope this module is imported into.
+    /// \param NS      An aliased namespace.
+    /// \param File    File where the declaration is located.
+    /// \param Line    Line number of the declaration.
    DIImportedEntity *createImportedModule(DIScope *Context,
-                                           DIImportedEntity *NS, unsigned Line);
+                                           DIImportedEntity *NS, DIFile *File,
+                                           unsigned Line);

    /// Create a descriptor for an imported module.
-    /// \param Context The scope this module is imported into
-    /// \param M The module being imported here
-    /// \param Line Line number
+    /// \param Context The scope this module is imported into.
+    /// \param M       The module being imported here
+    /// \param File    File where the declaration is located.
+    /// \param Line    Line number of the declaration.
    DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M,
-                                           unsigned Line);
+                                           DIFile *File, unsigned Line);

    /// Create a descriptor for an imported function.
-    /// \param Context The scope this module is imported into
-    /// \param Decl The declaration (or definition) of a function, type, or
-    ///             variable
-    /// \param Line Line number
+    /// \param Context The scope this module is imported into.
+    /// \param Decl    The declaration (or definition) of a function, type, or
+    ///                variable.
+    /// \param File    File where the declaration is located.
+    /// \param Line    Line number of the declaration.
    DIImportedEntity *createImportedDeclaration(DIScope *Context, DINode *Decl,
-                                                unsigned Line,
+                                                DIFile *File, unsigned Line,
                                                StringRef Name = "");

    /// Insert a new llvm.dbg.declare intrinsic call.
--- a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
@ -435,10 +435,10 @@ class DIScope : public DINode {

  /// Return the raw underlying file.
  ///
-  /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file
-  /// (it\em is the file).  If \c this is an \a DIFile, we need to return \c
-  /// this.  Otherwise, return the first operand, which is where all other
-  /// subclasses store their file pointer.
+  /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file (it
+  /// \em is the file).  If \c this is an \a DIFile, we need to return \c this.
+  /// Otherwise, return the first operand, which is where all other subclasses
+  /// store their file pointer.
  Metadata *getRawFile() const {
    return isa<DIFile>(this) ? const_cast<DIScope *>(this)
                             : static_cast<Metadata *>(getOperand(0));
@ -2551,32 +2551,32 @@ class DIImportedEntity : public DINode {

  static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
                                   DIScope *Scope, DINodeRef Entity,
-                                   unsigned Line, StringRef Name,
+                                   DIFile *File, unsigned Line, StringRef Name,
                                   StorageType Storage,
                                   bool ShouldCreate = true) {
-    return getImpl(Context, Tag, Scope, Entity, Line,
+    return getImpl(Context, Tag, Scope, Entity, File, Line,
                   getCanonicalMDString(Context, Name), Storage, ShouldCreate);
  }
  static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
                                   Metadata *Scope, Metadata *Entity,
-                                   unsigned Line, MDString *Name,
-                                   StorageType Storage,
+                                   Metadata *File, unsigned Line,
+                                   MDString *Name, StorageType Storage,
                                   bool ShouldCreate = true);

  TempDIImportedEntity cloneImpl() const {
    return getTemporary(getContext(), getTag(), getScope(), getEntity(),
-                        getLine(), getName());
+                        getFile(), getLine(), getName());
  }

 public:
  DEFINE_MDNODE_GET(DIImportedEntity,
                    (unsigned Tag, DIScope *Scope, DINodeRef Entity,
-                     unsigned Line, StringRef Name = ""),
-                    (Tag, Scope, Entity, Line, Name))
+                     DIFile *File, unsigned Line, StringRef Name = ""),
+                    (Tag, Scope, Entity, File, Line, Name))
  DEFINE_MDNODE_GET(DIImportedEntity,
                    (unsigned Tag, Metadata *Scope, Metadata *Entity,
-                     unsigned Line, MDString *Name),
-                    (Tag, Scope, Entity, Line, Name))
+                     Metadata *File, unsigned Line, MDString *Name),
+                    (Tag, Scope, Entity, File, Line, Name))

  TempDIImportedEntity clone() const { return cloneImpl(); }

@ -2584,10 +2584,12 @@ class DIImportedEntity : public DINode {
  DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
  DINodeRef getEntity() const { return DINodeRef(getRawEntity()); }
  StringRef getName() const { return getStringOperand(2); }
+  DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }

  Metadata *getRawScope() const { return getOperand(0); }
  Metadata *getRawEntity() const { return getOperand(1); }
  MDString *getRawName() const { return getOperandAs<MDString>(2); }
+  Metadata *getRawFile() const { return getOperand(3); }

  static bool classof(const Metadata *MD) {
    return MD->getMetadataID() == DIImportedEntityKind;
--- a/contrib/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm/include/llvm/IR/Dominators.h
@ -34,22 +34,31 @@ class Module;
 class raw_ostream;

 extern template class DomTreeNodeBase<BasicBlock>;
-extern template class DominatorTreeBase<BasicBlock>;
+extern template class DominatorTreeBase<BasicBlock, false>; // DomTree
+extern template class DominatorTreeBase<BasicBlock, true>; // PostDomTree

 namespace DomTreeBuilder {
-extern template void Calculate<Function, BasicBlock *>(
-    DominatorTreeBaseByGraphTraits<GraphTraits<BasicBlock *>> &DT, Function &F);
+using BBDomTree = DomTreeBase<BasicBlock>;
+using BBPostDomTree = PostDomTreeBase<BasicBlock>;

-extern template void Calculate<Function, Inverse<BasicBlock *>>(
-    DominatorTreeBaseByGraphTraits<GraphTraits<Inverse<BasicBlock *>>> &DT,
-    Function &F);
+extern template void Calculate<BBDomTree, Function>(BBDomTree &DT, Function &F);
+extern template void Calculate<BBPostDomTree, Function>(BBPostDomTree &DT,
+                                                        Function &F);

-extern template bool Verify<BasicBlock *>(
-    const DominatorTreeBaseByGraphTraits<GraphTraits<BasicBlock *>> &DT);
+extern template void InsertEdge<BBDomTree>(BBDomTree &DT, BasicBlock *From,
+                                           BasicBlock *To);
+extern template void InsertEdge<BBPostDomTree>(BBPostDomTree &DT,
+                                               BasicBlock *From,
+                                               BasicBlock *To);

-extern template bool Verify<Inverse<BasicBlock *>>(
-    const DominatorTreeBaseByGraphTraits<GraphTraits<Inverse<BasicBlock *>>>
-        &DT);
+extern template void DeleteEdge<BBDomTree>(BBDomTree &DT, BasicBlock *From,
+                                           BasicBlock *To);
+extern template void DeleteEdge<BBPostDomTree>(BBPostDomTree &DT,
+                                               BasicBlock *From,
+                                               BasicBlock *To);
+
+extern template bool Verify<BBDomTree>(const BBDomTree &DT);
+extern template bool Verify<BBPostDomTree>(const BBPostDomTree &DT);
 }  // namespace DomTreeBuilder

 using DomTreeNode = DomTreeNodeBase<BasicBlock>;
@ -122,14 +131,12 @@ template <> struct DenseMapInfo<BasicBlockEdge> {
 /// the dominator tree is initially constructed may still exist in the tree,
 /// even if the tree is properly updated. Calling code should not rely on the
 /// preceding statements; this is stated only to assist human understanding.
-class DominatorTree : public DominatorTreeBase<BasicBlock> {
-public:
-  using Base = DominatorTreeBase<BasicBlock>;
+class DominatorTree : public DominatorTreeBase<BasicBlock, false> {
+ public:
+  using Base = DominatorTreeBase<BasicBlock, false>;

-  DominatorTree() : DominatorTreeBase<BasicBlock>(false) {}
-  explicit DominatorTree(Function &F) : DominatorTreeBase<BasicBlock>(false) {
-    recalculate(F);
-  }
+  DominatorTree() = default;
+  explicit DominatorTree(Function &F) { recalculate(F); }

  /// Handle invalidation explicitly.
  bool invalidate(Function &F, const PreservedAnalyses &PA,
--- a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
@ -32,16 +32,6 @@ class Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
                          [llvm_i1_ty], [llvm_ptr_ty],
                          [IntrNoMem]>;
-
-//
-// DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) ->
-// Hexagon_void_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_void_si_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty],
-                          []>;
-
 //
 // DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) ->
 // Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
@ -4959,11 +4949,25 @@ Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">;
 //
 def int_hexagon_S2_deinterleave :
 Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">;
+
 //
 // BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1)
 //
 def int_hexagon_prefetch :
-Hexagon_void_si_Intrinsic<"HEXAGON_prefetch">;
+Hexagon_Intrinsic<"HEXAGON_prefetch", [], [llvm_ptr_ty], []>;
+def int_hexagon_Y2_dccleana :
+Hexagon_Intrinsic<"HEXAGON_Y2_dccleana", [], [llvm_ptr_ty], []>;
+def int_hexagon_Y2_dccleaninva :
+Hexagon_Intrinsic<"HEXAGON_Y2_dccleaninva", [], [llvm_ptr_ty], []>;
+def int_hexagon_Y2_dcinva :
+Hexagon_Intrinsic<"HEXAGON_Y2_dcinva", [], [llvm_ptr_ty], []>;
+def int_hexagon_Y2_dczeroa :
+Hexagon_Intrinsic<"HEXAGON_Y2_dczeroa", [], [llvm_ptr_ty],
+      [IntrWriteMem, IntrArgMemOnly, IntrHasSideEffects]>;
+def int_hexagon_Y4_l2fetch :
+Hexagon_Intrinsic<"HEXAGON_Y4_l2fetch", [], [llvm_ptr_ty, llvm_i32_ty], []>;
+def int_hexagon_Y5_l2fetch :
+Hexagon_Intrinsic<"HEXAGON_Y5_l2fetch", [], [llvm_ptr_ty, llvm_i64_ty], []>;

 def llvm_ptr32_ty : LLVMPointerType<llvm_i32_ty>;
 def llvm_ptr64_ty : LLVMPointerType<llvm_i64_ty>;
--- a/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@ -373,6 +373,49 @@ let TargetPrefix = "s390" in {
  def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty],
                                 [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
                                 [IntrNoMem]>;
+
+  // Instructions from the Vector Enhancements Facility 1
+  def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty,
+                                          llvm_v16i8_ty>;
+
+  def int_s390_vmslg  : GCCBuiltin<"__builtin_s390_vmslg">,
+                        Intrinsic<[llvm_v16i8_ty],
+                                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty,
+                                   llvm_i32_ty], [IntrNoMem]>;
+
+  def int_s390_vfmaxdb : Intrinsic<[llvm_v2f64_ty],
+                                   [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+                                   [IntrNoMem]>;
+  def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty],
+                                   [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+                                   [IntrNoMem]>;
+  def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty],
+                                   [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                                   [IntrNoMem]>;
+  def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty],
+                                   [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                                   [IntrNoMem]>;
+
+  def int_s390_vfcesbs  : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
+  def int_s390_vfchsbs  : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
+  def int_s390_vfchesbs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
+
+  def int_s390_vftcisb : SystemZBinaryConvIntCC<llvm_v4i32_ty, llvm_v4f32_ty>;
+
+  def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty],
+                                 [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty],
+                                 [IntrNoMem]>;
+
+  // Instructions from the Vector Packed Decimal Facility
+  def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">,
+                      Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
+                                [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">,
+                       Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
+                                 // In fact write-only but there's no property
+                                 // for that.
+                                 [IntrArgMemOnly]>;
 }

 //===----------------------------------------------------------------------===//
--- a/contrib/llvm/include/llvm/MC/LaneBitmask.h
+++ b/contrib/llvm/include/llvm/MC/LaneBitmask.h
@ -75,6 +75,9 @@ namespace llvm {

    static LaneBitmask getNone() { return LaneBitmask(0); }
    static LaneBitmask getAll()  { return ~LaneBitmask(0); }
+    static LaneBitmask getLane(unsigned Lane) {
+      return LaneBitmask(Type(1) << Lane);
+    }

  private:
    Type Mask = 0;
--- a/contrib/llvm/include/llvm/MC/MCFixup.h
+++ b/contrib/llvm/include/llvm/MC/MCFixup.h
@ -69,7 +69,7 @@ class MCFixup {
  /// an instruction or an assembler directive.
  const MCExpr *Value;

-  /// The byte index of start of the relocation inside the encoded instruction.
+  /// The byte index of start of the relocation inside the MCFragment.
  uint32_t Offset;

  /// The target dependent kind of fixup item this is. The kind is used to
--- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
@ -209,6 +209,15 @@ class MCInstrDesc {
  /// well.
  unsigned getNumOperands() const { return NumOperands; }

+  using const_opInfo_iterator = const MCOperandInfo *;
+
+  const_opInfo_iterator opInfo_begin() const { return OpInfo; }
+  const_opInfo_iterator opInfo_end() const { return OpInfo + NumOperands; }
+
+  iterator_range<const_opInfo_iterator> operands() const {
+    return make_range(opInfo_begin(), opInfo_end());
+  }
+
  /// \brief Return the number of MachineOperands that are register
  /// definitions.  Register definitions always occur at the start of the
  /// machine operand list.  This is the number of "outs" in the .td file,
--- a/contrib/llvm/include/llvm/Object/COFFImportFile.h
+++ b/contrib/llvm/include/llvm/Object/COFFImportFile.h
@ -95,7 +95,7 @@ struct COFFShortExport {
  }
 };

-std::error_code writeImportLibrary(StringRef DLLName,
+std::error_code writeImportLibrary(StringRef ImportName,
                                   StringRef Path,
                                   ArrayRef<COFFShortExport> Exports,
                                   COFF::MachineTypes Machine);
--- a/contrib/llvm/include/llvm/Object/COFFModuleDefinition.h
+++ b/contrib/llvm/include/llvm/Object/COFFModuleDefinition.h
@ -16,7 +16,6 @@
 //
 //===----------------------------------------------------------------------===//

-
 #ifndef LLVM_OBJECT_COFF_MODULE_DEFINITION_H
 #define LLVM_OBJECT_COFF_MODULE_DEFINITION_H

@ -29,6 +28,7 @@ namespace object {
 struct COFFModuleDefinition {
  std::vector<COFFShortExport> Exports;
  std::string OutputFile;
+  std::string ImportName;
  uint64_t ImageBase = 0;
  uint64_t StackReserve = 0;
  uint64_t StackCommit = 0;
@ -40,8 +40,12 @@ struct COFFModuleDefinition {
  uint32_t MinorOSVersion = 0;
 };

+// mingw and wine def files do not mangle _ for x86 which
+// is a consequence of legacy binutils' dlltool functionality.
+// This MingwDef flag should be removed once mingw stops this pratice.
 Expected<COFFModuleDefinition>
-parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine);
+parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine,
+                          bool MingwDef = false);

 } // End namespace object.
 } // End namespace llvm.
--- a/contrib/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
@ -60,6 +60,8 @@ ArrayRef<uint8_t> toDebugT(ArrayRef<LeafRecord>, BumpPtrAllocator &Alloc);

 } // end namespace llvm

+LLVM_YAML_DECLARE_SCALAR_TRAITS(codeview::GUID, true)
+
 LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::LeafRecord)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord)

--- a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
@ -43,8 +43,9 @@ AARCH64_ARCH_EXT_NAME("crypto",   AArch64::AEK_CRYPTO,   "+crypto","-crypto")
 AARCH64_ARCH_EXT_NAME("fp",       AArch64::AEK_FP,       "+fp-armv8",  "-fp-armv8")
 AARCH64_ARCH_EXT_NAME("simd",     AArch64::AEK_SIMD,     "+neon",  "-neon")
 AARCH64_ARCH_EXT_NAME("fp16",     AArch64::AEK_FP16,     "+fullfp16",  "-fullfp16")
-AARCH64_ARCH_EXT_NAME("profile",     AArch64::AEK_PROFILE,     "+spe",  "-spe")
-AARCH64_ARCH_EXT_NAME("ras",     AArch64::AEK_RAS,     "+ras",  "-ras")
+AARCH64_ARCH_EXT_NAME("profile",  AArch64::AEK_PROFILE,  "+spe",  "-spe")
+AARCH64_ARCH_EXT_NAME("ras",      AArch64::AEK_RAS,      "+ras",  "-ras")
+AARCH64_ARCH_EXT_NAME("sve",      AArch64::AEK_SVE,      "+sve",  "-sve")
 #undef AARCH64_ARCH_EXT_NAME

 #ifndef AARCH64_CPU_NAME
--- a/contrib/llvm/include/llvm/Support/BinaryItemStream.h
+++ b/contrib/llvm/include/llvm/Support/BinaryItemStream.h
@ -62,32 +62,45 @@ class BinaryItemStream : public BinaryStream {
    return Error::success();
  }

-  void setItems(ArrayRef<T> ItemArray) { Items = ItemArray; }
+  void setItems(ArrayRef<T> ItemArray) {
+    Items = ItemArray;
+    computeItemOffsets();
+  }

  uint32_t getLength() override {
-    uint32_t Size = 0;
-    for (const auto &Item : Items)
-      Size += Traits::length(Item);
-    return Size;
+    return ItemEndOffsets.empty() ? 0 : ItemEndOffsets.back();
  }

 private:
-  Expected<uint32_t> translateOffsetIndex(uint32_t Offset) const {
+  void computeItemOffsets() {
+    ItemEndOffsets.clear();
+    ItemEndOffsets.reserve(Items.size());
    uint32_t CurrentOffset = 0;
-    uint32_t CurrentIndex = 0;
    for (const auto &Item : Items) {
-      if (CurrentOffset >= Offset)
-        break;
-      CurrentOffset += Traits::length(Item);
-      ++CurrentIndex;
+      uint32_t Len = Traits::length(Item);
+      assert(Len > 0 && "no empty items");
+      CurrentOffset += Len;
+      ItemEndOffsets.push_back(CurrentOffset);
    }
-    if (CurrentOffset != Offset)
+  }
+
+  Expected<uint32_t> translateOffsetIndex(uint32_t Offset) {
+    // Make sure the offset is somewhere in our items array.
+    if (Offset >= getLength())
      return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
-    return CurrentIndex;
+    ++Offset;
+    auto Iter =
+        std::lower_bound(ItemEndOffsets.begin(), ItemEndOffsets.end(), Offset);
+    size_t Idx = std::distance(ItemEndOffsets.begin(), Iter);
+    assert(Idx < Items.size() && "binary search for offset failed");
+    return Idx;
  }

  llvm::support::endianness Endian;
  ArrayRef<T> Items;
+
+  // Sorted vector of offsets to accelerate lookup.
+  std::vector<uint32_t> ItemEndOffsets;
 };

 } // end namespace llvm
--- a/contrib/llvm/include/llvm/Support/Format.h
+++ b/contrib/llvm/include/llvm/Support/Format.h
@ -125,30 +125,39 @@ inline format_object<Ts...> format(const char *Fmt, const Ts &... Vals) {
  return format_object<Ts...>(Fmt, Vals...);
 }

-/// This is a helper class used for left_justify() and right_justify().
+/// This is a helper class for left_justify, right_justify, and center_justify.
 class FormattedString {
+public:
+  enum Justification { JustifyNone, JustifyLeft, JustifyRight, JustifyCenter };
+  FormattedString(StringRef S, unsigned W, Justification J)
+      : Str(S), Width(W), Justify(J) {}
+
+private:
  StringRef Str;
  unsigned Width;
-  bool RightJustify;
+  Justification Justify;
  friend class raw_ostream;
-
-public:
-    FormattedString(StringRef S, unsigned W, bool R)
-      : Str(S), Width(W), RightJustify(R) { }
 };

 /// left_justify - append spaces after string so total output is
 /// \p Width characters.  If \p Str is larger that \p Width, full string
 /// is written with no padding.
 inline FormattedString left_justify(StringRef Str, unsigned Width) {
-  return FormattedString(Str, Width, false);
+  return FormattedString(Str, Width, FormattedString::JustifyLeft);
 }

 /// right_justify - add spaces before string so total output is
 /// \p Width characters.  If \p Str is larger that \p Width, full string
 /// is written with no padding.
 inline FormattedString right_justify(StringRef Str, unsigned Width) {
-  return FormattedString(Str, Width, true);
+  return FormattedString(Str, Width, FormattedString::JustifyRight);
+}
+
+/// center_justify - add spaces before and after string so total output is
+/// \p Width characters.  If \p Str is larger that \p Width, full string
+/// is written with no padding.
+inline FormattedString center_justify(StringRef Str, unsigned Width) {
+  return FormattedString(Str, Width, FormattedString::JustifyCenter);
 }

 /// This is a helper class used for format_hex() and format_decimal().
--- a/contrib/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h
@ -41,27 +41,21 @@

 namespace llvm {

-template <class NodeT> class DominatorTreeBase;
+template <typename NodeT, bool IsPostDom>
+class DominatorTreeBase;

-namespace detail {
-
-template <typename GT> struct DominatorTreeBaseTraits {
-  static_assert(std::is_pointer<typename GT::NodeRef>::value,
-                "Currently NodeRef must be a pointer type.");
-  using type = DominatorTreeBase<
-      typename std::remove_pointer<typename GT::NodeRef>::type>;
-};
-
-} // end namespace detail
-
-template <typename GT>
-using DominatorTreeBaseByGraphTraits =
-    typename detail::DominatorTreeBaseTraits<GT>::type;
+namespace DomTreeBuilder {
+template <class DomTreeT>
+struct SemiNCAInfo;
+}  // namespace DomTreeBuilder

 /// \brief Base class for the actual dominator tree node.
 template <class NodeT> class DomTreeNodeBase {
  friend struct PostDominatorTree;
-  template <class N> friend class DominatorTreeBase;
+  friend class DominatorTreeBase<NodeT, false>;
+  friend class DominatorTreeBase<NodeT, true>;
+  friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase<NodeT, false>>;
+  friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase<NodeT, true>>;

  NodeT *TheBB;
  DomTreeNodeBase *IDom;
@ -192,58 +186,69 @@ void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &O,
 }

 namespace DomTreeBuilder {
-template <class NodeT>
-struct SemiNCAInfo;
+// The routines below are provided in a separate header but referenced here.
+template <typename DomTreeT, typename FuncT>
+void Calculate(DomTreeT &DT, FuncT &F);

-// The calculate routine is provided in a separate header but referenced here.
-template <class FuncT, class N>
-void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<N>> &DT, FuncT &F);
+template <class DomTreeT>
+void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
+                typename DomTreeT::NodePtr To);

-// The verify function is provided in a separate header but referenced here.
-template <class N>
-bool Verify(const DominatorTreeBaseByGraphTraits<GraphTraits<N>> &DT);
+template <class DomTreeT>
+void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
+                typename DomTreeT::NodePtr To);
+
+template <typename DomTreeT>
+bool Verify(const DomTreeT &DT);
 }  // namespace DomTreeBuilder

 /// \brief Core dominator tree base class.
 ///
 /// This class is a generic template over graph nodes. It is instantiated for
 /// various graphs in the LLVM IR or in the code generator.
-template <class NodeT> class DominatorTreeBase {
+template <typename NodeT, bool IsPostDom>
+class DominatorTreeBase {
 protected:
  std::vector<NodeT *> Roots;
-  bool IsPostDominators;

  using DomTreeNodeMapType =
     DenseMap<NodeT *, std::unique_ptr<DomTreeNodeBase<NodeT>>>;
  DomTreeNodeMapType DomTreeNodes;
  DomTreeNodeBase<NodeT> *RootNode;
+  using ParentPtr = decltype(std::declval<NodeT *>()->getParent());
+  ParentPtr Parent = nullptr;

  mutable bool DFSInfoValid = false;
  mutable unsigned int SlowQueries = 0;

-  friend struct DomTreeBuilder::SemiNCAInfo<NodeT>;
-  using SNCAInfoTy = DomTreeBuilder::SemiNCAInfo<NodeT>;
+  friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase>;

 public:
-  explicit DominatorTreeBase(bool isPostDom) : IsPostDominators(isPostDom) {}
+  static_assert(std::is_pointer<typename GraphTraits<NodeT *>::NodeRef>::value,
+                "Currently DominatorTreeBase supports only pointer nodes");
+  using NodeType = NodeT;
+  using NodePtr = NodeT *;
+  static constexpr bool IsPostDominator = IsPostDom;
+
+  DominatorTreeBase() {}

  DominatorTreeBase(DominatorTreeBase &&Arg)
      : Roots(std::move(Arg.Roots)),
-        IsPostDominators(Arg.IsPostDominators),
        DomTreeNodes(std::move(Arg.DomTreeNodes)),
-        RootNode(std::move(Arg.RootNode)),
-        DFSInfoValid(std::move(Arg.DFSInfoValid)),
-        SlowQueries(std::move(Arg.SlowQueries)) {
+        RootNode(Arg.RootNode),
+        Parent(Arg.Parent),
+        DFSInfoValid(Arg.DFSInfoValid),
+        SlowQueries(Arg.SlowQueries) {
    Arg.wipe();
  }

  DominatorTreeBase &operator=(DominatorTreeBase &&RHS) {
    Roots = std::move(RHS.Roots);
-    IsPostDominators = RHS.IsPostDominators;
    DomTreeNodes = std::move(RHS.DomTreeNodes);
-    RootNode = std::move(RHS.RootNode);
-    DFSInfoValid = std::move(RHS.DFSInfoValid);
-    SlowQueries = std::move(RHS.SlowQueries);
+    RootNode = RHS.RootNode;
+    Parent = RHS.Parent;
+    DFSInfoValid = RHS.DFSInfoValid;
+    SlowQueries = RHS.SlowQueries;
    RHS.wipe();
    return *this;
  }
@ -259,11 +264,12 @@ template <class NodeT> class DominatorTreeBase {

  /// isPostDominator - Returns true if analysis based of postdoms
  ///
-  bool isPostDominator() const { return IsPostDominators; }
+  bool isPostDominator() const { return IsPostDominator; }

  /// compare - Return false if the other dominator tree base matches this
  /// dominator tree base. Otherwise return true.
  bool compare(const DominatorTreeBase &Other) const {
+    if (Parent != Other.Parent) return true;

    const DomTreeNodeMapType &OtherDomTreeNodes = Other.DomTreeNodes;
    if (DomTreeNodes.size() != OtherDomTreeNodes.size())
@ -443,10 +449,50 @@ template <class NodeT> class DominatorTreeBase {
                                      const_cast<NodeT *>(B));
  }

+  bool isVirtualRoot(const DomTreeNodeBase<NodeT> *A) const {
+    return isPostDominator() && !A->getBlock();
+  }
+
  //===--------------------------------------------------------------------===//
  // API to update (Post)DominatorTree information based on modifications to
  // the CFG...

+  /// Inform the dominator tree about a CFG edge insertion and update the tree.
+  ///
+  /// This function has to be called just before or just after making the update
+  /// on the actual CFG. There cannot be any other updates that the dominator
+  /// tree doesn't know about.
+  ///
+  /// Note that for postdominators it automatically takes care of inserting
+  /// a reverse edge internally (so there's no need to swap the parameters).
+  ///
+  void insertEdge(NodeT *From, NodeT *To) {
+    assert(From);
+    assert(To);
+    assert(From->getParent() == Parent);
+    assert(To->getParent() == Parent);
+    DomTreeBuilder::InsertEdge(*this, From, To);
+  }
+
+  /// Inform the dominator tree about a CFG edge deletion and update the tree.
+  ///
+  /// This function has to be called just after making the update
+  /// on the actual CFG. There cannot be any other updates that the dominator
+  /// tree doesn't know about. The only exception is when the deletion that the
+  /// tree is informed about makes some (domominator) subtree unreachable -- in
+  /// this case, it is fine to perform deletions within this subtree.
+  ///
+  /// Note that for postdominators it automatically takes care of deleting
+  /// a reverse edge internally (so there's no need to swap the parameters).
+  ///
+  void deleteEdge(NodeT *From, NodeT *To) {
+    assert(From);
+    assert(To);
+    assert(From->getParent() == Parent);
+    assert(To->getParent() == Parent);
+    DomTreeBuilder::DeleteEdge(*this, From, To);
+  }
+
  /// Add a new node to the dominator tree information.
  ///
  /// This creates a new node as a child of DomBB dominator node, linking it
@ -530,7 +576,7 @@ template <class NodeT> class DominatorTreeBase {
  /// splitBlock - BB is split and now it has one successor. Update dominator
  /// tree to reflect this change.
  void splitBlock(NodeT *NewBB) {
-    if (this->IsPostDominators)
+    if (IsPostDominator)
      Split<Inverse<NodeT *>>(NewBB);
    else
      Split<NodeT *>(NewBB);
@ -607,37 +653,33 @@ template <class NodeT> class DominatorTreeBase {
  template <class FT> void recalculate(FT &F) {
    using TraitsTy = GraphTraits<FT *>;
    reset();
+    Parent = &F;

-    if (!this->IsPostDominators) {
+    if (!IsPostDominator) {
      // Initialize root
      NodeT *entry = TraitsTy::getEntryNode(&F);
      addRoot(entry);
-
-      DomTreeBuilder::Calculate<FT, NodeT *>(*this, F);
    } else {
      // Initialize the roots list
      for (auto *Node : nodes(&F))
        if (TraitsTy::child_begin(Node) == TraitsTy::child_end(Node))
          addRoot(Node);
-
-      DomTreeBuilder::Calculate<FT, Inverse<NodeT *>>(*this, F);
    }
+
+    DomTreeBuilder::Calculate(*this, F);
  }

  /// verify - check parent and sibling property
-  bool verify() const {
-    return this->isPostDominator()
-           ? DomTreeBuilder::Verify<Inverse<NodeT *>>(*this)
-           : DomTreeBuilder::Verify<NodeT *>(*this);
-  }
+  bool verify() const { return DomTreeBuilder::Verify(*this); }

 protected:
  void addRoot(NodeT *BB) { this->Roots.push_back(BB); }

  void reset() {
    DomTreeNodes.clear();
-    this->Roots.clear();
+    Roots.clear();
    RootNode = nullptr;
+    Parent = nullptr;
    DFSInfoValid = false;
    SlowQueries = 0;
  }
@ -719,13 +761,21 @@ template <class NodeT> class DominatorTreeBase {
  void wipe() {
    DomTreeNodes.clear();
    RootNode = nullptr;
+    Parent = nullptr;
  }
 };

+template <typename T>
+using DomTreeBase = DominatorTreeBase<T, false>;
+
+template <typename T>
+using PostDomTreeBase = DominatorTreeBase<T, true>;
+
 // These two functions are declared out of line as a workaround for building
 // with old (< r147295) versions of clang because of pr11642.
-template <class NodeT>
-bool DominatorTreeBase<NodeT>::dominates(const NodeT *A, const NodeT *B) const {
+template <typename NodeT, bool IsPostDom>
+bool DominatorTreeBase<NodeT, IsPostDom>::dominates(const NodeT *A,
+                                                    const NodeT *B) const {
  if (A == B)
    return true;

@ -735,9 +785,9 @@ bool DominatorTreeBase<NodeT>::dominates(const NodeT *A, const NodeT *B) const {
  return dominates(getNode(const_cast<NodeT *>(A)),
                   getNode(const_cast<NodeT *>(B)));
 }
-template <class NodeT>
-bool DominatorTreeBase<NodeT>::properlyDominates(const NodeT *A,
-                                                 const NodeT *B) const {
+template <typename NodeT, bool IsPostDom>
+bool DominatorTreeBase<NodeT, IsPostDom>::properlyDominates(
+    const NodeT *A, const NodeT *B) const {
  if (A == B)
    return false;

--- a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@ -20,15 +20,28 @@
 /// out that the theoretically slower O(n*log(n)) implementation is actually
 /// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs.
 ///
+/// The file uses the Depth Based Search algorithm to perform incremental
+/// upates (insertion and deletions). The implemented algorithm is based on this
+/// publication:
+///
+///   An Experimental Study of Dynamic Dominators
+///   Loukas Georgiadis, et al., April 12 2016, pp. 5-7, 9-10:
+///   https://arxiv.org/pdf/1604.02711.pdf
+///
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H
 #define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H

+#include <queue>
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/GenericDomTree.h"

+#define DEBUG_TYPE "dom-tree-builder"
+
 namespace llvm {
 namespace DomTreeBuilder {

@ -46,13 +59,14 @@ struct ChildrenGetter<NodePtr, true> {
  }
 };

-// Information record used by Semi-NCA during tree construction.
-template <typename NodeT>
+template <typename DomTreeT>
 struct SemiNCAInfo {
-  using NodePtr = NodeT *;
-  using DomTreeT = DominatorTreeBase<NodeT>;
+  using NodePtr = typename DomTreeT::NodePtr;
+  using NodeT = typename DomTreeT::NodeType;
  using TreeNodePtr = DomTreeNodeBase<NodeT> *;
+  static constexpr bool IsPostDom = DomTreeT::IsPostDominator;

+  // Information record used by Semi-NCA during tree construction.
  struct InfoRec {
    unsigned DFSNum = 0;
    unsigned Parent = 0;
@ -62,11 +76,13 @@ struct SemiNCAInfo {
    SmallVector<NodePtr, 2> ReverseChildren;
  };

-  std::vector<NodePtr> NumToNode;
+  // Number to node mapping is 1-based. Initialize the mapping to start with
+  // a dummy element.
+  std::vector<NodePtr> NumToNode = {nullptr};
  DenseMap<NodePtr, InfoRec> NodeToInfo;

  void clear() {
-    NumToNode.clear();
+    NumToNode = {nullptr}; // Restore to initial state with a dummy start node.
    NodeToInfo.clear();
  }

@ -90,12 +106,28 @@ struct SemiNCAInfo {
    // Add a new tree node for this NodeT, and link it as a child of
    // IDomNode
    return (DT.DomTreeNodes[BB] = IDomNode->addChild(
-                llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode)))
+        llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode)))
        .get();
  }

  static bool AlwaysDescend(NodePtr, NodePtr) { return true; }

+  struct BlockNamePrinter {
+    NodePtr N;
+
+    BlockNamePrinter(NodePtr Block) : N(Block) {}
+    BlockNamePrinter(TreeNodePtr TN) : N(TN ? TN->getBlock() : nullptr) {}
+
+    friend raw_ostream &operator<<(raw_ostream &O, const BlockNamePrinter &BP) {
+      if (!BP.N)
+        O << "nullptr";
+      else
+        BP.N->printAsOperand(O, false);
+
+      return O;
+    }
+  };
+
  // Custom DFS implementation which can skip nodes based on a provided
  // predicate. It also collects ReverseChildren so that we don't have to spend
  // time getting predecessors in SemiNCA.
@ -177,44 +209,42 @@ struct SemiNCAInfo {
    return VInInfo.Label;
  }

-  template <typename NodeType>
-  void runSemiNCA(DomTreeT &DT, unsigned NumBlocks) {
-    // Step #1: Number blocks in depth-first order and initialize variables used
-    // in later stages of the algorithm.
-    const unsigned N = doFullDFSWalk(DT, AlwaysDescend);
-
-    // It might be that some blocks did not get a DFS number (e.g., blocks of
-    // infinite loops). In these cases an artificial exit node is required.
-    const bool MultipleRoots =
-        DT.Roots.size() > 1 || (DT.isPostDominator() && N != NumBlocks);
-
+  // This function requires DFS to be run before calling it.
+  void runSemiNCA(DomTreeT &DT, const unsigned MinLevel = 0) {
+    const unsigned NextDFSNum(NumToNode.size());
    // Initialize IDoms to spanning tree parents.
-    for (unsigned i = 1; i <= N; ++i) {
+    for (unsigned i = 1; i < NextDFSNum; ++i) {
      const NodePtr V = NumToNode[i];
      auto &VInfo = NodeToInfo[V];
      VInfo.IDom = NumToNode[VInfo.Parent];
    }

-    // Step #2: Calculate the semidominators of all vertices.
-    for (unsigned i = N; i >= 2; --i) {
+    // Step #1: Calculate the semidominators of all vertices.
+    for (unsigned i = NextDFSNum - 1; i >= 2; --i) {
      NodePtr W = NumToNode[i];
      auto &WInfo = NodeToInfo[W];

      // Initialize the semi dominator to point to the parent node.
      WInfo.Semi = WInfo.Parent;
-      for (const auto &N : WInfo.ReverseChildren)
-        if (NodeToInfo.count(N)) {  // Only if this predecessor is reachable!
-          unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi;
-          if (SemiU < WInfo.Semi)
-            WInfo.Semi = SemiU;
-        }
+      for (const auto &N : WInfo.ReverseChildren) {
+        if (NodeToInfo.count(N) == 0)  // Skip unreachable predecessors.
+          continue;
+
+        const TreeNodePtr TN = DT.getNode(N);
+        // Skip predecessors whose level is above the subtree we are processing.
+        if (TN && TN->getLevel() < MinLevel)
+          continue;
+
+        unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi;
+        if (SemiU < WInfo.Semi) WInfo.Semi = SemiU;
+      }
    }

-    // Step #3: Explicitly define the immediate dominator of each vertex.
+    // Step #2: Explicitly define the immediate dominator of each vertex.
    //          IDom[i] = NCA(SDom[i], SpanningTreeParent(i)).
    // Note that the parents were stored in IDoms and later got invalidated
    // during path compression in Eval.
-    for (unsigned i = 2; i <= N; ++i) {
+    for (unsigned i = 2; i < NextDFSNum; ++i) {
      const NodePtr W = NumToNode[i];
      auto &WInfo = NodeToInfo[W];
      const unsigned SDomNum = NodeToInfo[NumToNode[WInfo.Semi]].DFSNum;
@ -224,46 +254,11 @@ struct SemiNCAInfo {

      WInfo.IDom = WIDomCandidate;
    }
-
-    if (DT.Roots.empty()) return;
-
-    // Add a node for the root.  This node might be the actual root, if there is
-    // one exit block, or it may be the virtual exit (denoted by
-    // (BasicBlock *)0) which postdominates all real exits if there are multiple
-    // exit blocks, or an infinite loop.
-    NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr;
-
-    DT.RootNode =
-        (DT.DomTreeNodes[Root] =
-             llvm::make_unique<DomTreeNodeBase<NodeT>>(Root, nullptr))
-            .get();
-
-    // Loop over all of the reachable blocks in the function...
-    for (unsigned i = 2; i <= N; ++i) {
-      NodePtr W = NumToNode[i];
-
-      // Don't replace this with 'count', the insertion side effect is important
-      if (DT.DomTreeNodes[W])
-        continue; // Haven't calculated this node yet?
-
-      NodePtr ImmDom = getIDom(W);
-
-      assert(ImmDom || DT.DomTreeNodes[nullptr]);
-
-      // Get or calculate the node for the immediate dominator
-      TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT);
-
-      // Add a new tree node for this BasicBlock, and link it as a child of
-      // IDomNode
-      DT.DomTreeNodes[W] = IDomNode->addChild(
-          llvm::make_unique<DomTreeNodeBase<NodeT>>(W, IDomNode));
-    }
  }

  template <typename DescendCondition>
  unsigned doFullDFSWalk(const DomTreeT &DT, DescendCondition DC) {
    unsigned Num = 0;
-    NumToNode.push_back(nullptr);

    if (DT.Roots.size() > 1) {
      auto &BBInfo = NodeToInfo[nullptr];
@ -283,11 +278,257 @@ struct SemiNCAInfo {
    return Num;
  }

-  static void PrintBlockOrNullptr(raw_ostream &O, NodePtr Obj) {
-    if (!Obj)
-      O << "nullptr";
+  void calculateFromScratch(DomTreeT &DT, const unsigned NumBlocks) {
+    // Step #0: Number blocks in depth-first order and initialize variables used
+    // in later stages of the algorithm.
+    const unsigned LastDFSNum = doFullDFSWalk(DT, AlwaysDescend);
+
+    runSemiNCA(DT);
+
+    if (DT.Roots.empty()) return;
+
+    // Add a node for the root.  This node might be the actual root, if there is
+    // one exit block, or it may be the virtual exit (denoted by
+    // (BasicBlock *)0) which postdominates all real exits if there are multiple
+    // exit blocks, or an infinite loop.
+    // It might be that some blocks did not get a DFS number (e.g., blocks of
+    // infinite loops). In these cases an artificial exit node is required.
+    const bool MultipleRoots = DT.Roots.size() > 1 || (DT.isPostDominator() &&
+                                                       LastDFSNum != NumBlocks);
+    NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr;
+
+    DT.RootNode = (DT.DomTreeNodes[Root] =
+                       llvm::make_unique<DomTreeNodeBase<NodeT>>(Root, nullptr))
+        .get();
+    attachNewSubtree(DT, DT.RootNode);
+  }
+
+  void attachNewSubtree(DomTreeT& DT, const TreeNodePtr AttachTo) {
+    // Attach the first unreachable block to AttachTo.
+    NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock();
+    // Loop over all of the discovered blocks in the function...
+    for (size_t i = 1, e = NumToNode.size(); i != e; ++i) {
+      NodePtr W = NumToNode[i];
+      DEBUG(dbgs() << "\tdiscovered a new reachable node "
+                   << BlockNamePrinter(W) << "\n");
+
+      // Don't replace this with 'count', the insertion side effect is important
+      if (DT.DomTreeNodes[W]) continue;  // Haven't calculated this node yet?
+
+      NodePtr ImmDom = getIDom(W);
+
+      // Get or calculate the node for the immediate dominator
+      TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT);
+
+      // Add a new tree node for this BasicBlock, and link it as a child of
+      // IDomNode
+      DT.DomTreeNodes[W] = IDomNode->addChild(
+          llvm::make_unique<DomTreeNodeBase<NodeT>>(W, IDomNode));
+    }
+  }
+
+  void reattachExistingSubtree(DomTreeT &DT, const TreeNodePtr AttachTo) {
+    NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock();
+    for (size_t i = 1, e = NumToNode.size(); i != e; ++i) {
+      const NodePtr N = NumToNode[i];
+      const TreeNodePtr TN = DT.getNode(N);
+      assert(TN);
+      const TreeNodePtr NewIDom = DT.getNode(NodeToInfo[N].IDom);
+      TN->setIDom(NewIDom);
+    }
+  }
+
+  // Helper struct used during edge insertions.
+  struct InsertionInfo {
+    using BucketElementTy = std::pair<unsigned, TreeNodePtr>;
+    struct DecreasingLevel {
+      bool operator()(const BucketElementTy &First,
+                      const BucketElementTy &Second) const {
+        return First.first > Second.first;
+      }
+    };
+
+    std::priority_queue<BucketElementTy, SmallVector<BucketElementTy, 8>,
+        DecreasingLevel>
+        Bucket;  // Queue of tree nodes sorted by level in descending order.
+    SmallDenseSet<TreeNodePtr, 8> Affected;
+    SmallDenseSet<TreeNodePtr, 8> Visited;
+    SmallVector<TreeNodePtr, 8> AffectedQueue;
+    SmallVector<TreeNodePtr, 8> VisitedNotAffectedQueue;
+  };
+
+  static void InsertEdge(DomTreeT &DT, const NodePtr From, const NodePtr To) {
+    assert(From && To && "Cannot connect nullptrs");
+    DEBUG(dbgs() << "Inserting edge " << BlockNamePrinter(From) << " -> "
+                 << BlockNamePrinter(To) << "\n");
+    const TreeNodePtr FromTN = DT.getNode(From);
+
+    // Ignore edges from unreachable nodes.
+    if (!FromTN) return;
+
+    DT.DFSInfoValid = false;
+
+    const TreeNodePtr ToTN = DT.getNode(To);
+    if (!ToTN)
+      InsertUnreachable(DT, FromTN, To);
    else
-      Obj->printAsOperand(O, false);
+      InsertReachable(DT, FromTN, ToTN);
+  }
+
+  // Handles insertion to a node already in the dominator tree.
+  static void InsertReachable(DomTreeT &DT, const TreeNodePtr From,
+                              const TreeNodePtr To) {
+    DEBUG(dbgs() << "\tReachable " << BlockNamePrinter(From->getBlock())
+                 << " -> " << BlockNamePrinter(To->getBlock()) << "\n");
+    const NodePtr NCDBlock =
+        DT.findNearestCommonDominator(From->getBlock(), To->getBlock());
+    assert(NCDBlock || DT.isPostDominator());
+    const TreeNodePtr NCD = DT.getNode(NCDBlock);
+    assert(NCD);
+
+    DEBUG(dbgs() << "\t\tNCA == " << BlockNamePrinter(NCD) << "\n");
+    const TreeNodePtr ToIDom = To->getIDom();
+
+    // Nothing affected -- NCA property holds.
+    // (Based on the lemma 2.5 from the second paper.)
+    if (NCD == To || NCD == ToIDom) return;
+
+    // Identify and collect affected nodes.
+    InsertionInfo II;
+    DEBUG(dbgs() << "Marking " << BlockNamePrinter(To) << " as affected\n");
+    II.Affected.insert(To);
+    const unsigned ToLevel = To->getLevel();
+    DEBUG(dbgs() << "Putting " << BlockNamePrinter(To) << " into a Bucket\n");
+    II.Bucket.push({ToLevel, To});
+
+    while (!II.Bucket.empty()) {
+      const TreeNodePtr CurrentNode = II.Bucket.top().second;
+      II.Bucket.pop();
+      DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: "
+                   << BlockNamePrinter(CurrentNode) << "\n");
+      II.Visited.insert(CurrentNode);
+      II.AffectedQueue.push_back(CurrentNode);
+
+      // Discover and collect affected successors of the current node.
+      VisitInsertion(DT, CurrentNode, CurrentNode->getLevel(), NCD, II);
+    }
+
+    // Finish by updating immediate dominators and levels.
+    UpdateInsertion(DT, NCD, II);
+  }
+
+  // Visits an affected node and collect its affected successors.
+  static void VisitInsertion(DomTreeT &DT, const TreeNodePtr TN,
+                             const unsigned RootLevel, const TreeNodePtr NCD,
+                             InsertionInfo &II) {
+    const unsigned NCDLevel = NCD->getLevel();
+    DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n");
+
+    assert(TN->getBlock());
+    for (const NodePtr Succ :
+        ChildrenGetter<NodePtr, IsPostDom>::Get(TN->getBlock())) {
+      const TreeNodePtr SuccTN = DT.getNode(Succ);
+      assert(SuccTN && "Unreachable successor found at reachable insertion");
+      const unsigned SuccLevel = SuccTN->getLevel();
+
+      DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
+                   << ", level = " << SuccLevel << "\n");
+
+      // Succ dominated by subtree From -- not affected.
+      // (Based on the lemma 2.5 from the second paper.)
+      if (SuccLevel > RootLevel) {
+        DEBUG(dbgs() << "\t\tDominated by subtree From\n");
+        if (II.Visited.count(SuccTN) != 0) continue;
+
+        DEBUG(dbgs() << "\t\tMarking visited not affected "
+                     << BlockNamePrinter(Succ) << "\n");
+        II.Visited.insert(SuccTN);
+        II.VisitedNotAffectedQueue.push_back(SuccTN);
+        VisitInsertion(DT, SuccTN, RootLevel, NCD, II);
+      } else if ((SuccLevel > NCDLevel + 1) && II.Affected.count(SuccTN) == 0) {
+        DEBUG(dbgs() << "\t\tMarking affected and adding "
+                     << BlockNamePrinter(Succ) << " to a Bucket\n");
+        II.Affected.insert(SuccTN);
+        II.Bucket.push({SuccLevel, SuccTN});
+      }
+    }
+  }
+
+  // Updates immediate dominators and levels after insertion.
+  static void UpdateInsertion(DomTreeT &DT, const TreeNodePtr NCD,
+                              InsertionInfo &II) {
+    DEBUG(dbgs() << "Updating NCD = " << BlockNamePrinter(NCD) << "\n");
+
+    for (const TreeNodePtr TN : II.AffectedQueue) {
+      DEBUG(dbgs() << "\tIDom(" << BlockNamePrinter(TN)
+                   << ") = " << BlockNamePrinter(NCD) << "\n");
+      TN->setIDom(NCD);
+    }
+
+    UpdateLevelsAfterInsertion(II);
+  }
+
+  static void UpdateLevelsAfterInsertion(InsertionInfo &II) {
+    DEBUG(dbgs() << "Updating levels for visited but not affected nodes\n");
+
+    for (const TreeNodePtr TN : II.VisitedNotAffectedQueue) {
+      DEBUG(dbgs() << "\tlevel(" << BlockNamePrinter(TN) << ") = ("
+                   << BlockNamePrinter(TN->getIDom()) << ") "
+                   << TN->getIDom()->getLevel() << " + 1\n");
+      TN->UpdateLevel();
+    }
+  }
+
+  // Handles insertion to previously unreachable nodes.
+  static void InsertUnreachable(DomTreeT &DT, const TreeNodePtr From,
+                                const NodePtr To) {
+    DEBUG(dbgs() << "Inserting " << BlockNamePrinter(From)
+                 << " -> (unreachable) " << BlockNamePrinter(To) << "\n");
+
+    // Collect discovered edges to already reachable nodes.
+    SmallVector<std::pair<NodePtr, TreeNodePtr>, 8> DiscoveredEdgesToReachable;
+    // Discover and connect nodes that became reachable with the insertion.
+    ComputeUnreachableDominators(DT, To, From, DiscoveredEdgesToReachable);
+
+    DEBUG(dbgs() << "Inserted " << BlockNamePrinter(From)
+                 << " -> (prev unreachable) " << BlockNamePrinter(To) << "\n");
+
+    DEBUG(DT.print(dbgs()));
+
+    // Used the discovered edges and inset discovered connecting (incoming)
+    // edges.
+    for (const auto &Edge : DiscoveredEdgesToReachable) {
+      DEBUG(dbgs() << "\tInserting discovered connecting edge "
+                   << BlockNamePrinter(Edge.first) << " -> "
+                   << BlockNamePrinter(Edge.second) << "\n");
+      InsertReachable(DT, DT.getNode(Edge.first), Edge.second);
+    }
+  }
+
+  // Connects nodes that become reachable with an insertion.
+  static void ComputeUnreachableDominators(
+      DomTreeT &DT, const NodePtr Root, const TreeNodePtr Incoming,
+      SmallVectorImpl<std::pair<NodePtr, TreeNodePtr>>
+      &DiscoveredConnectingEdges) {
+    assert(!DT.getNode(Root) && "Root must not be reachable");
+
+    // Visit only previously unreachable nodes.
+    auto UnreachableDescender = [&DT, &DiscoveredConnectingEdges](NodePtr From,
+                                                                  NodePtr To) {
+      const TreeNodePtr ToTN = DT.getNode(To);
+      if (!ToTN) return true;
+
+      DiscoveredConnectingEdges.push_back({From, ToTN});
+      return false;
+    };
+
+    SemiNCAInfo SNCA;
+    SNCA.runDFS<IsPostDom>(Root, 0, UnreachableDescender, 0);
+    SNCA.runSemiNCA(DT);
+    SNCA.attachNewSubtree(DT, Incoming);
+
+    DEBUG(dbgs() << "After adding unreachable nodes\n");
+    DEBUG(DT.print(dbgs()));
  }

  // Checks if the tree contains all reachable nodes in the input graph.
@ -298,12 +539,23 @@ struct SemiNCAInfo {
    for (auto &NodeToTN : DT.DomTreeNodes) {
      const TreeNodePtr TN = NodeToTN.second.get();
      const NodePtr BB = TN->getBlock();
-      if (!BB) continue;
+
+      // Virtual root has a corresponding virtual CFG node.
+      if (DT.isVirtualRoot(TN)) continue;

      if (NodeToInfo.count(BB) == 0) {
-        errs() << "DomTree node ";
-        PrintBlockOrNullptr(errs(), BB);
-        errs() << " not found by DFS walk!\n";
+        errs() << "DomTree node " << BlockNamePrinter(BB)
+               << " not found by DFS walk!\n";
+        errs().flush();
+
+        return false;
+      }
+    }
+
+    for (const NodePtr N : NumToNode) {
+      if (N && !DT.getNode(N)) {
+        errs() << "CFG node " << BlockNamePrinter(N)
+               << " not found in the DomTree!\n";
        errs().flush();

        return false;
@ -313,6 +565,215 @@ struct SemiNCAInfo {
    return true;
  }

+  static void DeleteEdge(DomTreeT &DT, const NodePtr From, const NodePtr To) {
+    assert(From && To && "Cannot disconnect nullptrs");
+    DEBUG(dbgs() << "Deleting edge " << BlockNamePrinter(From) << " -> "
+                 << BlockNamePrinter(To) << "\n");
+
+#ifndef NDEBUG
+    // Ensure that the edge was in fact deleted from the CFG before informing
+    // the DomTree about it.
+    // The check is O(N), so run it only in debug configuration.
+    auto IsSuccessor = [](const NodePtr SuccCandidate, const NodePtr Of) {
+      auto Successors = ChildrenGetter<NodePtr, IsPostDom>::Get(Of);
+      return llvm::find(Successors, SuccCandidate) != Successors.end();
+    };
+    (void)IsSuccessor;
+    assert(!IsSuccessor(To, From) && "Deleted edge still exists in the CFG!");
+#endif
+
+    const TreeNodePtr FromTN = DT.getNode(From);
+    // Deletion in an unreachable subtree -- nothing to do.
+    if (!FromTN) return;
+
+    const TreeNodePtr ToTN = DT.getNode(To);
+    assert(ToTN && "To already unreachable -- there is no edge to delete");
+    const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To);
+    const TreeNodePtr NCD = DT.getNode(NCDBlock);
+
+    // To dominates From -- nothing to do.
+    if (ToTN == NCD) return;
+
+    const TreeNodePtr ToIDom = ToTN->getIDom();
+    DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom "
+                 << BlockNamePrinter(ToIDom) << "\n");
+
+    // To remains reachable after deletion.
+    // (Based on the caption under Figure 4. from the second paper.)
+    if (FromTN != ToIDom || HasProperSupport(DT, ToTN))
+      DeleteReachable(DT, FromTN, ToTN);
+    else
+      DeleteUnreachable(DT, ToTN);
+  }
+
+  // Handles deletions that leave destination nodes reachable.
+  static void DeleteReachable(DomTreeT &DT, const TreeNodePtr FromTN,
+                              const TreeNodePtr ToTN) {
+    DEBUG(dbgs() << "Deleting reachable " << BlockNamePrinter(FromTN) << " -> "
+                 << BlockNamePrinter(ToTN) << "\n");
+    DEBUG(dbgs() << "\tRebuilding subtree\n");
+
+    // Find the top of the subtree that needs to be rebuilt.
+    // (Based on the lemma 2.6 from the second paper.)
+    const NodePtr ToIDom =
+        DT.findNearestCommonDominator(FromTN->getBlock(), ToTN->getBlock());
+    assert(ToIDom || DT.isPostDominator());
+    const TreeNodePtr ToIDomTN = DT.getNode(ToIDom);
+    assert(ToIDomTN);
+    const TreeNodePtr PrevIDomSubTree = ToIDomTN->getIDom();
+    // Top of the subtree to rebuild is the root node. Rebuild the tree from
+    // scratch.
+    if (!PrevIDomSubTree) {
+      DEBUG(dbgs() << "The entire tree needs to be rebuilt\n");
+      DT.recalculate(*DT.Parent);
+      return;
+    }
+
+    // Only visit nodes in the subtree starting at To.
+    const unsigned Level = ToIDomTN->getLevel();
+    auto DescendBelow = [Level, &DT](NodePtr, NodePtr To) {
+      return DT.getNode(To)->getLevel() > Level;
+    };
+
+    DEBUG(dbgs() << "\tTop of subtree: " << BlockNamePrinter(ToIDomTN) << "\n");
+
+    SemiNCAInfo SNCA;
+    SNCA.runDFS<IsPostDom>(ToIDom, 0, DescendBelow, 0);
+    DEBUG(dbgs() << "\tRunning Semi-NCA\n");
+    SNCA.runSemiNCA(DT, Level);
+    SNCA.reattachExistingSubtree(DT, PrevIDomSubTree);
+  }
+
+  // Checks if a node has proper support, as defined on the page 3 and later
+  // explained on the page 7 of the second paper.
+  static bool HasProperSupport(DomTreeT &DT, const TreeNodePtr TN) {
+    DEBUG(dbgs() << "IsReachableFromIDom " << BlockNamePrinter(TN) << "\n");
+    for (const NodePtr Pred :
+        ChildrenGetter<NodePtr, !IsPostDom>::Get(TN->getBlock())) {
+      DEBUG(dbgs() << "\tPred " << BlockNamePrinter(Pred) << "\n");
+      if (!DT.getNode(Pred)) continue;
+
+      const NodePtr Support =
+          DT.findNearestCommonDominator(TN->getBlock(), Pred);
+      DEBUG(dbgs() << "\tSupport " << BlockNamePrinter(Support) << "\n");
+      if (Support != TN->getBlock()) {
+        DEBUG(dbgs() << "\t" << BlockNamePrinter(TN)
+                     << " is reachable from support "
+                     << BlockNamePrinter(Support) << "\n");
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  // Handle deletions that make destination node unreachable.
+  // (Based on the lemma 2.7 from the second paper.)
+  static void DeleteUnreachable(DomTreeT &DT, const TreeNodePtr ToTN) {
+    DEBUG(dbgs() << "Deleting unreachable subtree " << BlockNamePrinter(ToTN)
+                 << "\n");
+    assert(ToTN);
+    assert(ToTN->getBlock());
+
+    SmallVector<NodePtr, 16> AffectedQueue;
+    const unsigned Level = ToTN->getLevel();
+
+    // Traverse destination node's descendants with greater level in the tree
+    // and collect visited nodes.
+    auto DescendAndCollect = [Level, &AffectedQueue, &DT](NodePtr, NodePtr To) {
+      const TreeNodePtr TN = DT.getNode(To);
+      assert(TN);
+      if (TN->getLevel() > Level) return true;
+      if (llvm::find(AffectedQueue, To) == AffectedQueue.end())
+        AffectedQueue.push_back(To);
+
+      return false;
+    };
+
+    SemiNCAInfo SNCA;
+    unsigned LastDFSNum =
+        SNCA.runDFS<IsPostDom>(ToTN->getBlock(), 0, DescendAndCollect, 0);
+
+    TreeNodePtr MinNode = ToTN;
+
+    // Identify the top of the subtree to rebuilt by finding the NCD of all
+    // the affected nodes.
+    for (const NodePtr N : AffectedQueue) {
+      const TreeNodePtr TN = DT.getNode(N);
+      const NodePtr NCDBlock =
+          DT.findNearestCommonDominator(TN->getBlock(), ToTN->getBlock());
+      assert(NCDBlock || DT.isPostDominator());
+      const TreeNodePtr NCD = DT.getNode(NCDBlock);
+      assert(NCD);
+
+      DEBUG(dbgs() << "Processing affected node " << BlockNamePrinter(TN)
+                   << " with NCD = " << BlockNamePrinter(NCD)
+                   << ", MinNode =" << BlockNamePrinter(MinNode) << "\n");
+      if (NCD != TN && NCD->getLevel() < MinNode->getLevel()) MinNode = NCD;
+    }
+
+    // Root reached, rebuild the whole tree from scratch.
+    if (!MinNode->getIDom()) {
+      DEBUG(dbgs() << "The entire tree needs to be rebuilt\n");
+      DT.recalculate(*DT.Parent);
+      return;
+    }
+
+    // Erase the unreachable subtree in reverse preorder to process all children
+    // before deleting their parent.
+    for (unsigned i = LastDFSNum; i > 0; --i) {
+      const NodePtr N = SNCA.NumToNode[i];
+      const TreeNodePtr TN = DT.getNode(N);
+      DEBUG(dbgs() << "Erasing node " << BlockNamePrinter(TN) << "\n");
+
+      EraseNode(DT, TN);
+    }
+
+    // The affected subtree start at the To node -- there's no extra work to do.
+    if (MinNode == ToTN) return;
+
+    DEBUG(dbgs() << "DeleteUnreachable: running DFS with MinNode = "
+                 << BlockNamePrinter(MinNode) << "\n");
+    const unsigned MinLevel = MinNode->getLevel();
+    const TreeNodePtr PrevIDom = MinNode->getIDom();
+    assert(PrevIDom);
+    SNCA.clear();
+
+    // Identify nodes that remain in the affected subtree.
+    auto DescendBelow = [MinLevel, &DT](NodePtr, NodePtr To) {
+      const TreeNodePtr ToTN = DT.getNode(To);
+      return ToTN && ToTN->getLevel() > MinLevel;
+    };
+    SNCA.runDFS<IsPostDom>(MinNode->getBlock(), 0, DescendBelow, 0);
+
+    DEBUG(dbgs() << "Previous IDom(MinNode) = " << BlockNamePrinter(PrevIDom)
+                 << "\nRunning Semi-NCA\n");
+
+    // Rebuild the remaining part of affected subtree.
+    SNCA.runSemiNCA(DT, MinLevel);
+    SNCA.reattachExistingSubtree(DT, PrevIDom);
+  }
+
+  // Removes leaf tree nodes from the dominator tree.
+  static void EraseNode(DomTreeT &DT, const TreeNodePtr TN) {
+    assert(TN);
+    assert(TN->getNumChildren() == 0 && "Not a tree leaf");
+
+    const TreeNodePtr IDom = TN->getIDom();
+    assert(IDom);
+
+    auto ChIt = llvm::find(IDom->Children, TN);
+    assert(ChIt != IDom->Children.end());
+    std::swap(*ChIt, IDom->Children.back());
+    IDom->Children.pop_back();
+
+    DT.DomTreeNodes.erase(TN->getBlock());
+  }
+
+  //~~
+  //===--------------- DomTree correctness verification ---------------------===
+  //~~
+
  // Check if for every parent with a level L in the tree all of its children
  // have level L + 1.
  static bool VerifyLevels(const DomTreeT &DT) {
@ -323,20 +784,18 @@ struct SemiNCAInfo {

      const TreeNodePtr IDom = TN->getIDom();
      if (!IDom && TN->getLevel() != 0) {
-        errs() << "Node without an IDom ";
-        PrintBlockOrNullptr(errs(), BB);
-        errs() << " has a nonzero level " << TN->getLevel() << "!\n";
+        errs() << "Node without an IDom " << BlockNamePrinter(BB)
+               << " has a nonzero level " << TN->getLevel() << "!\n";
        errs().flush();

        return false;
      }

      if (IDom && TN->getLevel() != IDom->getLevel() + 1) {
-        errs() << "Node ";
-        PrintBlockOrNullptr(errs(), BB);
-        errs() << " has level " << TN->getLevel() << " while it's IDom ";
-        PrintBlockOrNullptr(errs(), IDom->getBlock());
-        errs() << " has level " << IDom->getLevel() << "!\n";
+        errs() << "Node " << BlockNamePrinter(BB) << " has level "
+               << TN->getLevel() << " while its IDom "
+               << BlockNamePrinter(IDom->getBlock()) << " has level "
+               << IDom->getLevel() << "!\n";
        errs().flush();

        return false;
@ -363,18 +822,14 @@ struct SemiNCAInfo {
      assert(ToTN);

      const NodePtr NCD = DT.findNearestCommonDominator(From, To);
-      const TreeNodePtr NCDTN = NCD ? DT.getNode(NCD) : nullptr;
+      const TreeNodePtr NCDTN = DT.getNode(NCD);
      const TreeNodePtr ToIDom = ToTN->getIDom();
      if (NCDTN != ToTN && NCDTN != ToIDom) {
-        errs() << "NearestCommonDominator verification failed:\n\tNCD(From:";
-        PrintBlockOrNullptr(errs(), From);
-        errs() << ", To:";
-        PrintBlockOrNullptr(errs(), To);
-        errs() << ") = ";
-        PrintBlockOrNullptr(errs(), NCD);
-        errs() << ",\t (should be To or IDom[To]: ";
-        PrintBlockOrNullptr(errs(), ToIDom ? ToIDom->getBlock() : nullptr);
-        errs() << ")\n";
+        errs() << "NearestCommonDominator verification failed:\n\tNCD(From:"
+               << BlockNamePrinter(From) << ", To:" << BlockNamePrinter(To)
+               << ") = " << BlockNamePrinter(NCD)
+               << ",\t (should be To or IDom[To]: " << BlockNamePrinter(ToIDom)
+               << ")\n";
        errs().flush();

        return false;
@ -440,11 +895,9 @@ struct SemiNCAInfo {

      for (TreeNodePtr Child : TN->getChildren())
        if (NodeToInfo.count(Child->getBlock()) != 0) {
-          errs() << "Child ";
-          PrintBlockOrNullptr(errs(), Child->getBlock());
-          errs() << " reachable after its parent ";
-          PrintBlockOrNullptr(errs(), BB);
-          errs() << " is removed!\n";
+          errs() << "Child " << BlockNamePrinter(Child)
+                 << " reachable after its parent " << BlockNamePrinter(BB)
+                 << " is removed!\n";
          errs().flush();

          return false;
@ -477,11 +930,9 @@ struct SemiNCAInfo {
          if (S == N) continue;

          if (NodeToInfo.count(S->getBlock()) == 0) {
-            errs() << "Node ";
-            PrintBlockOrNullptr(errs(), S->getBlock());
-            errs() << " not reachable when its sibling ";
-            PrintBlockOrNullptr(errs(), N->getBlock());
-            errs() << " is removed!\n";
+            errs() << "Node " << BlockNamePrinter(S)
+                   << " not reachable when its sibling " << BlockNamePrinter(N)
+                   << " is removed!\n";
            errs().flush();

            return false;
@ -494,23 +945,30 @@ struct SemiNCAInfo {
  }
 };

-template <class FuncT, class NodeT>
-void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT,
-               FuncT &F) {
-  using NodePtr = typename GraphTraits<NodeT>::NodeRef;
-  static_assert(std::is_pointer<NodePtr>::value,
-                "NodePtr should be a pointer type");
-  SemiNCAInfo<typename std::remove_pointer<NodePtr>::type> SNCA;
-  SNCA.template runSemiNCA<NodeT>(DT, GraphTraits<FuncT *>::size(&F));
+
+template <class DomTreeT, class FuncT>
+void Calculate(DomTreeT &DT, FuncT &F) {
+  SemiNCAInfo<DomTreeT> SNCA;
+  SNCA.calculateFromScratch(DT, GraphTraits<FuncT *>::size(&F));
 }

-template <class NodeT>
-bool Verify(const DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT) {
-  using NodePtr = typename GraphTraits<NodeT>::NodeRef;
-  static_assert(std::is_pointer<NodePtr>::value,
-                "NodePtr should be a pointer type");
-  SemiNCAInfo<typename std::remove_pointer<NodePtr>::type> SNCA;
+template <class DomTreeT>
+void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
+                typename DomTreeT::NodePtr To) {
+  if (DT.isPostDominator()) std::swap(From, To);
+  SemiNCAInfo<DomTreeT>::InsertEdge(DT, From, To);
+}

+template <class DomTreeT>
+void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
+                typename DomTreeT::NodePtr To) {
+  if (DT.isPostDominator()) std::swap(From, To);
+  SemiNCAInfo<DomTreeT>::DeleteEdge(DT, From, To);
+}
+
+template <class DomTreeT>
+bool Verify(const DomTreeT &DT) {
+  SemiNCAInfo<DomTreeT> SNCA;
  return SNCA.verifyReachability(DT) && SNCA.VerifyLevels(DT) &&
         SNCA.verifyNCD(DT) && SNCA.verifyParentProperty(DT) &&
         SNCA.verifySiblingProperty(DT);
@ -519,4 +977,6 @@ bool Verify(const DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT) {
 }  // namespace DomTreeBuilder
 }  // namespace llvm

+#undef DEBUG_TYPE
+
 #endif
--- a/contrib/llvm/include/llvm/Support/TargetParser.h
+++ b/contrib/llvm/include/llvm/Support/TargetParser.h
@ -85,6 +85,7 @@ enum ArchExtKind : unsigned {
  AEK_DSP = 0x400,
  AEK_FP16 = 0x800,
  AEK_RAS = 0x1000,
+  AEK_SVE = 0x2000,
  // Unsupported extensions.
  AEK_OS = 0x8000000,
  AEK_IWMMXT = 0x10000000,
@ -166,7 +167,8 @@ enum ArchExtKind : unsigned {
  AEK_FP16 = 0x20,
  AEK_PROFILE = 0x40,
  AEK_RAS = 0x80,
-  AEK_LSE = 0x100
+  AEK_LSE = 0x100,
+  AEK_SVE = 0x200
 };

 StringRef getCanonicalArchName(StringRef Arch);
--- a/contrib/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h
@ -1114,6 +1114,10 @@ class Input : public IO {
        void *Ctxt = nullptr,
        SourceMgr::DiagHandlerTy DiagHandler = nullptr,
        void *DiagHandlerCtxt = nullptr);
+  Input(MemoryBufferRef Input,
+        void *Ctxt = nullptr,
+        SourceMgr::DiagHandlerTy DiagHandler = nullptr,
+        void *DiagHandlerCtxt = nullptr);
  ~Input() override;

  // Check if there was an syntax or semantic error during parsing.
--- a/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@ -58,6 +58,7 @@ def : GINodeEquiv<G_SITOFP, sint_to_fp>;
 def : GINodeEquiv<G_UITOFP, uint_to_fp>;
 def : GINodeEquiv<G_FADD, fadd>;
 def : GINodeEquiv<G_FSUB, fsub>;
+def : GINodeEquiv<G_FMA, fma>;
 def : GINodeEquiv<G_FMUL, fmul>;
 def : GINodeEquiv<G_FDIV, fdiv>;
 def : GINodeEquiv<G_FREM, frem>;
--- a/contrib/llvm/include/llvm/Target/TargetLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetLowering.h
@ -2012,6 +2012,35 @@ class TargetLoweringBase {
    return isExtFreeImpl(I);
  }

+  /// Return true if \p Load and \p Ext can form an ExtLoad.
+  /// For example, in AArch64
+  ///   %L = load i8, i8* %ptr
+  ///   %E = zext i8 %L to i32
+  /// can be lowered into one load instruction
+  ///   ldrb w0, [x0]
+  bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
+                 const DataLayout &DL) const {
+    EVT VT = getValueType(DL, Ext->getType());
+    EVT LoadVT = getValueType(DL, Load->getType());
+
+    // If the load has other users and the truncate is not free, the ext
+    // probably isn't free.
+    if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
+        !isTruncateFree(Ext->getType(), Load->getType()))
+      return false;
+
+    // Check whether the target supports casts folded into loads.
+    unsigned LType;
+    if (isa<ZExtInst>(Ext))
+      LType = ISD::ZEXTLOAD;
+    else {
+      assert(isa<SExtInst>(Ext) && "Unexpected ext type!");
+      LType = ISD::SEXTLOAD;
+    }
+
+    return isLoadExtLegal(LType, VT, LoadVT);
+  }
+
  /// Return true if any actual instruction that defines a value of type FromTy
  /// implicitly zero-extends the value to ToTy in the result register.
  ///
--- a/contrib/llvm/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
+++ b/contrib/llvm/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
@ -0,0 +1,24 @@
+//===- DlltoolDriver.h - dlltool.exe-compatible driver ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines an interface to a dlltool.exe-compatible driver.
+// Used by llvm-dlltool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLDRIVERS_LLVM_DLLTOOL_DLLTOOLDRIVER_H
+#define LLVM_TOOLDRIVERS_LLVM_DLLTOOL_DLLTOOLDRIVER_H
+
+namespace llvm {
+template <typename T> class ArrayRef;
+
+int dlltoolDriverMain(ArrayRef<const char *> ArgsArr);
+} // namespace llvm
+
+#endif
--- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@ -433,7 +433,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
        if (Visited.insert(C).second)
          Worklist.push_back(C);

-  LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) {
+  auto VisitRef = [&](Function &Referee) {
    Node &RefereeN = *G.lookup(Referee);
    Edge *E = N->lookup(RefereeN);
    // FIXME: Similarly to new calls, we also currently preclude
@ -444,7 +444,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
    RetainedEdges.insert(&RefereeN);
    if (E->isCall())
      DemotedCallTargets.insert(&RefereeN);
-  });
+  };
+  LazyCallGraph::visitReferences(Worklist, Visited, VisitRef);
+
+  // Include synthetic reference edges to known, defined lib functions.
+  for (auto *F : G.getLibFunctions())
+    VisitRef(*F);

  // First remove all of the edges that are no longer present in this function.
  // We have to build a list of dead targets first and then remove them as the
--- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@ -14,7 +14,8 @@
 using namespace llvm;

 namespace llvm {
-template class DominanceFrontierBase<BasicBlock>;
+template class DominanceFrontierBase<BasicBlock, false>;
+template class DominanceFrontierBase<BasicBlock, true>;
 template class ForwardDominanceFrontierBase<BasicBlock>;
 }

--- a/contrib/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@ -26,7 +26,6 @@ using namespace llvm;
 STATISTIC(TotalInsts , "Number of instructions (of all types)");
 STATISTIC(TotalBlocks, "Number of basic blocks");
 STATISTIC(TotalFuncs , "Number of non-external functions");
-STATISTIC(TotalMemInst, "Number of memory instructions");

 #define HANDLE_INST(N, OPCODE, CLASS) \
  STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
@ -75,13 +74,6 @@ FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
 // function.
 //
 bool InstCount::runOnFunction(Function &F) {
-  unsigned StartMemInsts =
-    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
-    NumInvokeInst + NumAllocaInst;
  visit(F);
-  unsigned EndMemInsts =
-    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
-    NumInvokeInst + NumAllocaInst;
-  TotalMemInst += EndMemInsts-StartMemInsts;
  return false;
 }
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@ -1745,14 +1745,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
    return Constant::getNullValue(Op0->getType());

  // (A | ?) & A = A
-  Value *A = nullptr, *B = nullptr;
-  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
-      (A == Op1 || B == Op1))
+  if (match(Op0, m_c_Or(m_Specific(Op1), m_Value())))
    return Op1;

  // A & (A | ?) = A
-  if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
-      (A == Op0 || B == Op0))
+  if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
    return Op0;

  // A mask that only clears known zeros of a shifted value is a no-op.
@ -1852,26 +1849,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
    return Constant::getAllOnesValue(Op0->getType());

  // (A & ?) | A = A
-  Value *A = nullptr, *B = nullptr;
-  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
-      (A == Op1 || B == Op1))
+  if (match(Op0, m_c_And(m_Specific(Op1), m_Value())))
    return Op1;

  // A | (A & ?) = A
-  if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
-      (A == Op0 || B == Op0))
+  if (match(Op1, m_c_And(m_Specific(Op0), m_Value())))
    return Op0;

  // ~(A & ?) | A = -1
-  if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
-      (A == Op1 || B == Op1))
+  if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
    return Constant::getAllOnesValue(Op1->getType());

  // A | ~(A & ?) = -1
-  if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
-      (A == Op0 || B == Op0))
+  if (match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
    return Constant::getAllOnesValue(Op0->getType());

+  Value *A, *B;
  // (A & ~B) | (A ^ B) -> (A ^ B)
  // (~B & A) | (A ^ B) -> (A ^ B)
  // (A & ~B) | (B ^ A) -> (B ^ A)
--- a/Show More
+++ b/Show More