diff --git a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
index 559fb40773aa..a77cf04ea4d1 100644
--- a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -21,8 +21,8 @@
 //   class MyClass : public RefCountedBase<MyClass> {};
 //
 //   void foo() {
-//     // Objects that inherit from RefCountedBase should always be instantiated
-//     // on the heap, never on the stack.
+//     // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by
+//     // 1 (from 0 in this case).
 //     IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
 //
 //     // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
@@ -68,9 +68,6 @@ namespace llvm {
 /// calls to Release() and Retain(), which increment and decrement the object's
 /// refcount, respectively.  When a Release() call decrements the refcount to 0,
 /// the object deletes itself.
-///
-/// Objects that inherit from RefCountedBase should always be allocated with
-/// operator new.
 template <class Derived> class RefCountedBase {
   mutable unsigned RefCount = 0;
 
diff --git a/contrib/llvm/include/llvm/ADT/PriorityWorklist.h b/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
index c0b4709e98f8..3198dd438700 100644
--- a/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
+++ b/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include <algorithm>
@@ -107,6 +108,39 @@ public:
     return false;
   }
 
+  /// Insert a sequence of new elements into the PriorityWorklist.
+  template <typename SequenceT>
+  typename std::enable_if<!std::is_convertible<SequenceT, T>::value>::type
+  insert(SequenceT &&Input) {
+    if (std::begin(Input) == std::end(Input))
+      // Nothing to do for an empty input sequence.
+      return;
+
+    // First pull the input sequence into the vector as a bulk append
+    // operation.
+    ptrdiff_t StartIndex = V.size();
+    V.insert(V.end(), std::begin(Input), std::end(Input));
+    // Now walk backwards fixing up the index map and deleting any duplicates.
+    for (ptrdiff_t i = V.size() - 1; i >= StartIndex; --i) {
+      auto InsertResult = M.insert({V[i], i});
+      if (InsertResult.second)
+        continue;
+
+      // If the existing index is before this insert's start, nuke that one and
+      // move it up.
+      ptrdiff_t &Index = InsertResult.first->second;
+      if (Index < StartIndex) {
+        V[Index] = T();
+        Index = i;
+        continue;
+      }
+
+      // Otherwise the existing one comes first so just clear out the value in
+      // this slot.
+      V[i] = T();
+    }
+  }
+
   /// Remove the last element of the PriorityWorklist.
   void pop_back() {
     assert(!empty() && "Cannot remove an element when empty!");
@@ -169,6 +203,11 @@ public:
     return true;
   }
 
+  /// Reverse the items in the PriorityWorklist.
+  ///
+  /// This does an in-place reversal. Other kinds of reverse aren't easy to
+  /// support in the face of the worklist semantics.
+
   /// Completely clear the PriorityWorklist
   void clear() {
     M.clear();
diff --git a/contrib/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm/include/llvm/Analysis/Loads.h
index 139bf3c2116f..e167f36219d2 100644
--- a/contrib/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm/include/llvm/Analysis/Loads.h
@@ -23,10 +23,9 @@ namespace llvm {
 class DataLayout;
 class MDNode;
 
-/// isDereferenceablePointer - Return true if this is always a dereferenceable
-/// pointer. If the context instruction is specified perform context-sensitive
-/// analysis and return true if the pointer is dereferenceable at the
-/// specified instruction.
+/// Return true if this is always a dereferenceable pointer. If the context
+/// instruction is specified perform context-sensitive analysis and return true
+/// if the pointer is dereferenceable at the specified instruction.
 bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
                               const Instruction *CtxI = nullptr,
                               const DominatorTree *DT = nullptr);
@@ -40,8 +39,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
                                         const Instruction *CtxI = nullptr,
                                         const DominatorTree *DT = nullptr);
 
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap.
+/// Return true if we know that executing a load from this value cannot trap.
 ///
 /// If DT and ScanFrom are specified this method performs context-sensitive
 /// analysis and returns true if it is safe to load immediately before ScanFrom.
@@ -54,12 +52,12 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
                                  Instruction *ScanFrom = nullptr,
                                  const DominatorTree *DT = nullptr);
 
-/// DefMaxInstsToScan - the default number of maximum instructions
-/// to scan in the block, used by FindAvailableLoadedValue().
+/// The default number of maximum instructions to scan in the block, used by
+/// FindAvailableLoadedValue().
 extern cl::opt<unsigned> DefMaxInstsToScan;
 
-/// \brief Scan backwards to see if we have the value of the given load
-/// available locally within a small number of instructions.
+/// Scan backwards to see if we have the value of the given load available
+/// locally within a small number of instructions.
 ///
 /// You can use this function to scan across multiple blocks: after you call
 /// this function, if ScanFrom points at the beginning of the block, it's safe
diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
index c1be46ddd7b5..be8822df3dba 100644
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -208,6 +208,8 @@ public:
     SledKind Kind;
     bool AlwaysInstrument;
     const class Function *Fn;
+
+    void emit(int, MCStreamer *, const MCSymbol *) const;
   };
 
   // All the sleds to be emitted.
@@ -216,6 +218,9 @@ public:
   // Helper function to record a given XRay sled.
   void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
 
+  /// Emit a table with all XRay instrumentation points.
+  void emitXRayTable();
+
   //===------------------------------------------------------------------===//
   // MachineFunctionPass Implementation.
   //===------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
index 76e1df89169e..21ecef587aa5 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -59,6 +59,9 @@ class MachineDominatorTree : public MachineFunctionPass {
   /// such as BB == elt.NewBB.
   mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
 
+  /// The DominatorTreeBase that is used to compute a normal dominator tree
+  DominatorTreeBase<MachineBasicBlock>* DT;
+
   /// \brief Apply all the recorded critical edges to the DT.
   /// This updates the underlying DT information in a way that uses
   /// the fast query path of DT as much as possible.
@@ -68,7 +71,6 @@ class MachineDominatorTree : public MachineFunctionPass {
 
 public:
   static char ID; // Pass ID, replacement for typeid
-  DominatorTreeBase<MachineBasicBlock>* DT;
 
   MachineDominatorTree();
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index ca9a6c822876..878f1c76ebf6 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -116,12 +116,12 @@ public:
     // An unsigned integer indicating the identity of the source file
     // corresponding to a machine instruction.
     uint16_t File;
-    // An unsigned integer whose value encodes the applicable instruction set
-    // architecture for the current instruction.
-    uint8_t Isa;
     // An unsigned integer representing the DWARF path discriminator value
     // for this location.
     uint32_t Discriminator;
+    // An unsigned integer whose value encodes the applicable instruction set
+    // architecture for the current instruction.
+    uint8_t Isa;
     // A boolean indicating that the current instruction is the beginning of a
     // statement.
     uint8_t IsStmt:1,
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 078959ce15d0..07d5b5ea40dc 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id :
 // Instruction Intrinsics
 //===----------------------------------------------------------------------===//
 
+// The first parameter is s_sendmsg immediate (i16),
+// the second one is copied to m0
+def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+
 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
   Intrinsic<[], [], [IntrConvergent]>;
 
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index 3a496cb6645c..85966af9c820 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -2063,130 +2063,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
-// Vector extract and insert
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_vextractf32x4_512 :
-      GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
-                 Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
-                            llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti32x4_512 :
-      GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
-                 Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty,
-                            llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf32x4_256 :
-      GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">,
-                 Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
-                            llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti32x4_256 :
-      GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">,
-                 Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty,
-                            llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf64x2_256 :
-      GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">,
-                 Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
-                            llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti64x2_256 :
-      GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">,
-                 Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty,
-                            llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf64x2_512 :
-      GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">,
-                 Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
-                            llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti64x2_512 :
-      GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">,
-                 Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
-                            llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf32x8_512 :
-      GCCBuiltin<"__builtin_ia32_extractf32x8_mask">,
-                 Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
-                            llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti32x8_512 :
-      GCCBuiltin<"__builtin_ia32_extracti32x8_mask">,
-                 Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty,
-                            llvm_v8i32_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf64x4_512 :
-      GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
-                 Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
-                            llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti64x4_512 :
-      GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
-                 Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
-                            llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf32x4_256 :
-        GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf32x4_512 :
-        GCCBuiltin<"__builtin_ia32_insertf32x4_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf32x8_512 :
-        GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf64x2_256 :
-        GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf64x2_512 :
-        GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf64x4_512 :
-        GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti32x4_256 :
-        GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-          [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti32x4_512 :
-        GCCBuiltin<"__builtin_ia32_inserti32x4_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-          [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti32x8_512 :
-        GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-          [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti64x2_256 :
-        GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
-          Intrinsic<[llvm_v4i64_ty],
-          [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti64x2_512 :
-        GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
-          Intrinsic<[llvm_v8i64_ty],
-          [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti64x4_512 :
-        GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
-          Intrinsic<[llvm_v8i64_ty],
-          [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-}
-
 // Conditional load ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h
index 9d8d8c3ffb5c..347f21108913 100644
--- a/contrib/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm/include/llvm/Support/FileSystem.h
@@ -769,17 +769,13 @@ namespace detail {
   std::error_code directory_iterator_increment(DirIterState &);
   std::error_code directory_iterator_destruct(DirIterState &);
 
-  /// DirIterState - Keeps state for the directory_iterator. It is reference
-  /// counted in order to preserve InputIterator semantics on copy.
-  struct DirIterState : public RefCountedBase<DirIterState> {
-    DirIterState()
-      : IterationHandle(0) {}
-
+  /// Keeps state for the directory_iterator.
+  struct DirIterState {
     ~DirIterState() {
       directory_iterator_destruct(*this);
     }
 
-    intptr_t IterationHandle;
+    intptr_t IterationHandle = 0;
     directory_entry CurrentEntry;
   };
 } // end namespace detail
@@ -788,23 +784,23 @@ namespace detail {
 /// operator++ because we need an error_code. If it's really needed we can make
 /// it call report_fatal_error on error.
 class directory_iterator {
-  IntrusiveRefCntPtr<detail::DirIterState> State;
+  std::shared_ptr<detail::DirIterState> State;
 
 public:
   explicit directory_iterator(const Twine &path, std::error_code &ec) {
-    State = new detail::DirIterState;
+    State = std::make_shared<detail::DirIterState>();
     SmallString<128> path_storage;
     ec = detail::directory_iterator_construct(*State,
             path.toStringRef(path_storage));
   }
 
   explicit directory_iterator(const directory_entry &de, std::error_code &ec) {
-    State = new detail::DirIterState;
+    State = std::make_shared<detail::DirIterState>();
     ec = detail::directory_iterator_construct(*State, de.path());
   }
 
   /// Construct end iterator.
-  directory_iterator() : State(nullptr) {}
+  directory_iterator() = default;
 
   // No operator++ because we need error_code.
   directory_iterator &increment(std::error_code &ec) {
diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h
index 38acb36942bc..cbba9c08275a 100644
--- a/contrib/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h
@@ -209,6 +209,15 @@ struct DocumentListTraits {
   // static T::value_type& element(IO &io, T &seq, size_t index);
 };
 
+/// This class should be specialized by any type that needs to be converted
+/// to/from a YAML mapping in the case where the names of the keys are not known
+/// in advance, e.g. a string map.
+template <typename T>
+struct CustomMappingTraits {
+  // static void inputOne(IO &io, StringRef key, T &elem);
+  // static void output(IO &io, T &elem);
+};
+
 // Only used for better diagnostics of missing traits
 template <typename T>
 struct MissingTrait;
@@ -358,6 +367,23 @@ public:
   static bool const value =  (sizeof(test<SequenceTraits<T>>(nullptr)) == 1);
 };
 
+// Test if CustomMappingTraits<T> is defined on type T.
+template <class T>
+struct has_CustomMappingTraits
+{
+  typedef void (*Signature_input)(IO &io, StringRef key, T &v);
+
+  template <typename U>
+  static char test(SameType<Signature_input, &U::inputOne>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value =
+      (sizeof(test<CustomMappingTraits<T>>(nullptr)) == 1);
+};
+
 // has_FlowTraits<int> will cause an error with some compilers because
 // it subclasses int.  Using this wrapper only instantiates the
 // real has_FlowTraits only if the template type is a class.
@@ -493,6 +519,7 @@ struct missingTraits
                                         !has_BlockScalarTraits<T>::value &&
                                         !has_MappingTraits<T, Context>::value &&
                                         !has_SequenceTraits<T>::value &&
+                                        !has_CustomMappingTraits<T>::value &&
                                         !has_DocumentListTraits<T>::value> {};
 
 template <typename T, typename Context>
@@ -531,6 +558,7 @@ public:
   virtual void endMapping() = 0;
   virtual bool preflightKey(const char*, bool, bool, bool &, void *&) = 0;
   virtual void postflightKey(void*) = 0;
+  virtual std::vector<StringRef> keys() = 0;
 
   virtual void beginFlowMapping() = 0;
   virtual void endFlowMapping() = 0;
@@ -818,6 +846,21 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) {
   }
 }
 
+template <typename T>
+typename std::enable_if<has_CustomMappingTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
+  if ( io.outputting() ) {
+    io.beginMapping();
+    CustomMappingTraits<T>::output(io, Val);
+    io.endMapping();
+  } else {
+    io.beginMapping();
+    for (StringRef key : io.keys())
+      CustomMappingTraits<T>::inputOne(io, key, Val);
+    io.endMapping();
+  }
+}
+
 template <typename T>
 typename std::enable_if<missingTraits<T, EmptyContext>::value, void>::type
 yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
@@ -1074,6 +1117,7 @@ private:
   void endMapping() override;
   bool preflightKey(const char *, bool, bool, bool &, void *&) override;
   void postflightKey(void *) override;
+  std::vector<StringRef> keys() override;
   void beginFlowMapping() override;
   void endFlowMapping() override;
   unsigned beginSequence() override;
@@ -1154,10 +1198,8 @@ private:
 
     typedef llvm::StringMap<std::unique_ptr<HNode>> NameToNode;
 
-    bool isValidKey(StringRef key);
-
     NameToNode                        Mapping;
-    llvm::SmallVector<const char*, 6> ValidKeys;
+    llvm::SmallVector<std::string, 6> ValidKeys;
   };
 
   class SequenceHNode : public HNode {
@@ -1215,6 +1257,7 @@ public:
   void endMapping() override;
   bool preflightKey(const char *key, bool, bool, bool &, void *&) override;
   void postflightKey(void *) override;
+  std::vector<StringRef> keys() override;
   void beginFlowMapping() override;
   void endFlowMapping() override;
   unsigned beginSequence() override;
@@ -1384,6 +1427,17 @@ operator>>(Input &In, T &Val) {
   return In;
 }
 
+// Define non-member operator>> so that Input can stream in a string map.
+template <typename T>
+inline
+typename std::enable_if<has_CustomMappingTraits<T>::value, Input &>::type
+operator>>(Input &In, T &Val) {
+  EmptyContext Ctx;
+  if (In.setCurrentDocument())
+    yamlize(In, Val, true, Ctx);
+  return In;
+}
+
 // Provide better error message about types missing a trait specialization
 template <typename T>
 inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@@ -1457,6 +1511,21 @@ operator<<(Output &Out, T &Val) {
   return Out;
 }
 
+// Define non-member operator<< so that Output can stream out a string map.
+template <typename T>
+inline
+typename std::enable_if<has_CustomMappingTraits<T>::value, Output &>::type
+operator<<(Output &Out, T &Val) {
+  EmptyContext Ctx;
+  Out.beginDocuments();
+  if (Out.preflightDocument(0)) {
+    yamlize(Out, Val, true, Ctx);
+    Out.postflightDocument();
+  }
+  Out.endDocuments();
+  return Out;
+}
+
 // Provide better error message about types missing a trait specialization
 template <typename T>
 inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@@ -1476,6 +1545,18 @@ template <typename T> struct SequenceTraitsImpl {
   }
 };
 
+/// Implementation of CustomMappingTraits for std::map<std::string, T>.
+template <typename T> struct StdMapStringCustomMappingTraitsImpl {
+  typedef std::map<std::string, T> map_type;
+  static void inputOne(IO &io, StringRef key, map_type &v) {
+    io.mapRequired(key.str().c_str(), v[key]);
+  }
+  static void output(IO &io, map_type &v) {
+    for (auto &p : v)
+      io.mapRequired(p.first.c_str(), p.second);
+  }
+};
+
 } // end namespace yaml
 } // end namespace llvm
 
@@ -1530,4 +1611,15 @@ template <typename T> struct SequenceTraitsImpl {
   }                                                                            \
   }
 
+/// Utility for declaring that std::map<std::string, _type> should be considered
+/// a YAML map.
+#define LLVM_YAML_IS_STRING_MAP(_type)                                         \
+  namespace llvm {                                                             \
+  namespace yaml {                                                             \
+  template <>                                                                  \
+  struct CustomMappingTraits<std::map<std::string, _type>>                     \
+      : public StdMapStringCustomMappingTraitsImpl<_type> {};                  \
+  }                                                                            \
+  }
+
 #endif // LLVM_SUPPORT_YAMLTRAITS_H
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 2a77baec6c36..073b4e6ab26a 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -2542,9 +2542,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
     return !CFP->getValueAPF().isNegZero();
 
-  // FIXME: Magic number! At the least, this should be given a name because it's
-  // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
-  // expose it as a parameter, so it can be used for testing / experimenting.
   if (Depth == MaxDepth)
     return false;  // Limit search depth.
 
@@ -2589,9 +2586,6 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
     return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
 
-  // FIXME: Magic number! At the least, this should be given a name because it's
-  // used similarly in CannotBeNegativeZero(). A better fix may be to
-  // expose it as a parameter, so it can be used for testing / experimenting.
   if (Depth == MaxDepth)
     return false;  // Limit search depth.
 
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index cd08268d47b5..5da421a79b7b 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -749,7 +749,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     // handles the case where this is type ODRed with a definition needed
     // by the importing module, in which case the existing definition is
     // used.
-    if (IsImporting && !ImportFullTypeDefinitions &&
+    if (IsImporting && !ImportFullTypeDefinitions && Identifier &&
         (Tag == dwarf::DW_TAG_enumeration_type ||
          Tag == dwarf::DW_TAG_class_type ||
          Tag == dwarf::DW_TAG_structure_type ||
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 0678bce449ed..79ecc4308fe7 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V,
                TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
       DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
       NoopInput = Op;
-    } else if (isa<CallInst>(I)) {
-      // Look through call (skipping callee)
-      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
-           i != e; ++i) {
-        unsigned attrInd = i - I->op_begin() + 1;
-        if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-          NoopInput = *i;
-          break;
-        }
-      }
-    } else if (isa<InvokeInst>(I)) {
-      // Look through invoke (skipping BB, BB, Callee)
-      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
-           i != e; ++i) {
-        unsigned attrInd = i - I->op_begin() + 1;
-        if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-          NoopInput = *i;
-          break;
-        }
-      }
+    } else if (auto CS = ImmutableCallSite(I)) {
+      const Value *ReturnedOp = CS.getReturnedArgOperand();
+      if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
+        NoopInput = ReturnedOp;
     } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
       // Value may come from either the aggregate or the scalar
       ArrayRef<unsigned> InsertLoc = IVI->getIndices();
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index de0a4f0befa1..5f15ac1d503b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -37,6 +37,8 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
@@ -2610,6 +2612,61 @@ AsmPrinterHandler::~AsmPrinterHandler() {}
 
 void AsmPrinterHandler::markFunctionEnd() {}
 
+// In the binary's "xray_instr_map" section, an array of these function entries
+// describes each instrumentation point.  When XRay patches your code, the index
+// into this table will be given to your handler as a patch point identifier.
+void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
+                                         const MCSymbol *CurrentFnSym) const {
+  Out->EmitSymbolValue(Sled, Bytes);
+  Out->EmitSymbolValue(CurrentFnSym, Bytes);
+  auto Kind8 = static_cast<uint8_t>(Kind);
+  Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+  Out->EmitBytes(
+      StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
+  Out->EmitZeros(2 * Bytes - 2);  // Pad the previous two entries
+}
+
+void AsmPrinter::emitXRayTable() {
+  if (Sleds.empty())
+    return;
+
+  auto PrevSection = OutStreamer->getCurrentSectionOnly();
+  auto Fn = MF->getFunction();
+  MCSection *Section = nullptr;
+  if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
+    if (Fn->hasComdat()) {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+                                         Fn->getComdat()->getName());
+    } else {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC);
+    }
+  } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
+    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+                                         SectionKind::getReadOnlyWithRel());
+  } else {
+    llvm_unreachable("Unsupported target");
+  }
+
+  // Before we switch over, we force a reference to a label inside the
+  // xray_instr_map section. Since this function is always called just
+  // before the function's end, we assume that this is happening after
+  // the last return instruction.
+
+  auto WordSizeBytes = TM.getPointerSize();
+  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
+  OutStreamer->EmitCodeAlignment(16);
+  OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
+  OutStreamer->SwitchSection(Section);
+  OutStreamer->EmitLabel(Tmp);
+  for (const auto &Sled : Sleds)
+    Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
+
+  OutStreamer->SwitchSection(PrevSection);
+  Sleds.clear();
+}
+
 void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
   SledKind Kind) {
   auto Fn = MI.getParent()->getParent()->getFunction();
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 422f2dc2f2fb..3d81184f774a 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -1124,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills(
   // earlier spill with smaller SlotIndex.
   for (const auto CurrentSpill : Spills) {
     MachineBasicBlock *Block = CurrentSpill->getParent();
-    MachineDomTreeNode *Node = MDT.DT->getNode(Block);
+    MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
     MachineInstr *PrevSpill = SpillBBToSpill[Node];
     if (PrevSpill) {
       SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
@@ -1132,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills(
       MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
       MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
       SpillsToRm.push_back(SpillToRm);
-      SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
+      SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
     } else {
-      SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
+      SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
     }
   }
   for (const auto SpillToRm : SpillsToRm)
@@ -1209,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders(
   // Sort the nodes in WorkSet in top-down order and save the nodes
   // in Orders. Orders will be used for hoisting in runHoistSpills.
   unsigned idx = 0;
-  Orders.push_back(MDT.DT->getNode(Root));
+  Orders.push_back(MDT.getBase().getNode(Root));
   do {
     MachineDomTreeNode *Node = Orders[idx++];
     const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b4b41c3d0011..4632484055d2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4277,7 +4277,8 @@ struct BaseIndexOffset {
   }
 
   /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
+  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
+                               int64_t PartialOffset = 0) {
     bool IsIndexSignExt = false;
 
     // Split up a folded GlobalAddress+Offset into its component parts.
@@ -4286,7 +4287,7 @@ struct BaseIndexOffset {
         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
                                                     SDLoc(GA),
                                                     GA->getValueType(0),
-                                                    /*Offset=*/0,
+                                                    /*Offset=*/PartialOffset,
                                                     /*isTargetGA=*/false,
                                                     GA->getTargetFlags()),
                                SDValue(),
@@ -4298,14 +4299,13 @@ struct BaseIndexOffset {
     // instruction, then it could be just the BASE or everything else we don't
     // know how to handle. Just use Ptr as BASE and give up.
     if (Ptr->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
 
     // We know that we have at least an ADD instruction. Try to pattern match
     // the simple case of BASE + OFFSET.
     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
-      return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
-                              IsIndexSignExt);
+      return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
     }
 
     // Inside a loop the current BASE pointer is calculated using an ADD and a
@@ -4314,7 +4314,7 @@ struct BaseIndexOffset {
     //          (i64 mul (i64 %induction_var)
     //                   (i64 %element_size)))
     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
 
     // Look at Base + Index + Offset cases.
     SDValue Base = Ptr->getOperand(0);
@@ -4328,14 +4328,14 @@ struct BaseIndexOffset {
 
     // Either the case of Base + Index (no offset) or something else.
     if (IndexOffset->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+      return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
 
     // Now we have the case of Base + Index + offset.
     SDValue Index = IndexOffset->getOperand(0);
     SDValue Offset = IndexOffset->getOperand(1);
 
     if (!isa<ConstantSDNode>(Offset))
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
 
     // Ignore signextends.
     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
@@ -4344,7 +4344,7 @@ struct BaseIndexOffset {
     } else IsIndexSignExt = false;
 
     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
-    return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+    return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
   }
 };
 } // namespace
diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 324d07118704..57b5d85bb550 100644
--- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -88,15 +88,15 @@ void OProfileJITEventListener::NotifyObjectEmitted(
   // Use symbol info to iterate functions in the object.
   for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
     SymbolRef Sym = P.first;
-    if (Sym.getType() != SymbolRef::ST_Function)
+    if (!Sym.getType() || *Sym.getType() != SymbolRef::ST_Function)
       continue;
 
-    ErrorOr<StringRef> NameOrErr = Sym.getName();
-    if (NameOrErr.getError())
+    Expected<StringRef> NameOrErr = Sym.getName();
+    if (!NameOrErr)
       continue;
     StringRef Name = *NameOrErr;
-    ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
-    if (AddrOrErr.getError())
+    Expected<uint64_t> AddrOrErr = Sym.getAddress();
+    if (!AddrOrErr)
       continue;
     uint64_t Addr = *AddrOrErr;
     uint64_t Size = P.second;
@@ -128,9 +128,9 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
     for (symbol_iterator I = DebugObj.symbol_begin(),
                          E = DebugObj.symbol_end();
          I != E; ++I) {
-      if (I->getType() == SymbolRef::ST_Function) {
-        ErrorOr<uint64_t> AddrOrErr = I->getAddress();
-        if (AddrOrErr.getError())
+      if (I->getType() && *I->getType() == SymbolRef::ST_Function) {
+        Expected<uint64_t> AddrOrErr = I->getAddress();
+        if (!AddrOrErr)
           continue;
         uint64_t Addr = *AddrOrErr;
 
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index 2d9d0f95efa5..a87b9bec1ed2 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -342,8 +342,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
          Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
          Name.startswith("avx.vinsertf128.") || // Added in 3.7
          Name == "avx2.vinserti128" || // Added in 3.7
+         Name.startswith("avx512.mask.insert") || // Added in 4.0
          Name.startswith("avx.vextractf128.") || // Added in 3.7
          Name == "avx2.vextracti128" || // Added in 3.7
+         Name.startswith("avx512.mask.vextract") || // Added in 4.0
          Name.startswith("sse4a.movnt.") || // Added in 3.9
          Name.startswith("avx.movnt.") || // Added in 3.2
          Name.startswith("avx512.storent.") || // Added in 3.9
@@ -1150,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
 
       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
-                         Name == "avx2.vinserti128")) {
+                         Name == "avx2.vinserti128" ||
+                         Name.startswith("avx512.mask.insert"))) {
       Value *Op0 = CI->getArgOperand(0);
       Value *Op1 = CI->getArgOperand(1);
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
-      VectorType *VecTy = cast<VectorType>(CI->getType());
-      unsigned NumElts = VecTy->getNumElements();
+      unsigned DstNumElts = CI->getType()->getVectorNumElements();
+      unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
+      unsigned Scale = DstNumElts / SrcNumElts;
 
       // Mask off the high bits of the immediate value; hardware ignores those.
-      Imm = Imm & 1;
+      Imm = Imm % Scale;
 
-      // Extend the second operand into a vector that is twice as big.
+      // Extend the second operand into a vector the size of the destination.
       Value *UndefV = UndefValue::get(Op1->getType());
-      SmallVector<uint32_t, 8> Idxs(NumElts);
-      for (unsigned i = 0; i != NumElts; ++i)
+      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      for (unsigned i = 0; i != SrcNumElts; ++i)
         Idxs[i] = i;
+      for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
+        Idxs[i] = SrcNumElts;
       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
 
       // Insert the second operand into the first operand.
@@ -1178,33 +1184,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
 
-      // The low half of the result is either the low half of the 1st operand
-      // or the low half of the 2nd operand (the inserted vector).
-      for (unsigned i = 0; i != NumElts / 2; ++i)
-        Idxs[i] = Imm ? i : (i + NumElts);
-      // The high half of the result is either the low half of the 2nd operand
-      // (the inserted vector) or the high half of the 1st operand.
-      for (unsigned i = NumElts / 2; i != NumElts; ++i)
-        Idxs[i] = Imm ? (i + NumElts / 2) : i;
+      // First fill with identify mask.
+      for (unsigned i = 0; i != DstNumElts; ++i)
+        Idxs[i] = i;
+      // Then replace the elements where we need to insert.
+      for (unsigned i = 0; i != SrcNumElts; ++i)
+        Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
+
+      // If the intrinsic has a mask operand, handle that.
+      if (CI->getNumArgOperands() == 5)
+        Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+                            CI->getArgOperand(3));
     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
-                         Name == "avx2.vextracti128")) {
+                         Name == "avx2.vextracti128" ||
+                         Name.startswith("avx512.mask.vextract"))) {
       Value *Op0 = CI->getArgOperand(0);
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
-      VectorType *VecTy = cast<VectorType>(CI->getType());
-      unsigned NumElts = VecTy->getNumElements();
+      unsigned DstNumElts = CI->getType()->getVectorNumElements();
+      unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
+      unsigned Scale = SrcNumElts / DstNumElts;
 
       // Mask off the high bits of the immediate value; hardware ignores those.
-      Imm = Imm & 1;
+      Imm = Imm % Scale;
 
-      // Get indexes for either the high half or low half of the input vector.
-      SmallVector<uint32_t, 4> Idxs(NumElts);
-      for (unsigned i = 0; i != NumElts; ++i) {
-        Idxs[i] = Imm ? (i + NumElts) : i;
+      // Get indexes for the subvector of the input vector.
+      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      for (unsigned i = 0; i != DstNumElts; ++i) {
+        Idxs[i] = i + (Imm * DstNumElts);
       }
+      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
 
-      Value *UndefV = UndefValue::get(Op0->getType());
-      Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
+      // If the intrinsic has a mask operand, handle that.
+      if (CI->getNumArgOperands() == 4)
+        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                            CI->getArgOperand(2));
     } else if (!IsX86 && Name == "stackprotectorcheck") {
       Rep = nullptr;
     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp
index 7364f0e0cd31..42b3a344352b 100644
--- a/contrib/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm/lib/LTO/LTO.cpp
@@ -891,23 +891,17 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
       ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
                       AddStream, Cache);
 
-  // Partition numbers for ThinLTO jobs start at 1 (see comments for
-  // GlobalResolution in LTO.h). Task numbers, however, start at
-  // ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
-  // through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
-  // generation partitions.
+  // Task numbers start at ParallelCodeGenParallelismLevel if an LTO
+  // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1
+  // are reserved for parallel code generation partitions.
   unsigned Task =
       HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0;
-  unsigned Partition = 1;
-
   for (auto &Mod : ThinLTO.ModuleMap) {
     if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
                                      ExportLists[Mod.first],
                                      ResolvedODR[Mod.first], ThinLTO.ModuleMap))
       return E;
-
     ++Task;
-    ++Partition;
   }
 
   return BackendProc->wait();
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
index 30f0deab90a0..4cfbbf8645e0 100644
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -76,8 +76,12 @@ namespace llvm {
      compile-time arithmetic on PPC double-double numbers, it is not able
      to represent all possible values held by a PPC double-double number,
      for example: (long double) 1.0 + (long double) 0x1p-106
-     Should this be replaced by a full emulation of PPC double-double?  */
-  static const fltSemantics semPPCDoubleDouble = {0, 0, 0, 0};
+     Should this be replaced by a full emulation of PPC double-double?
+
+     Note: we need to make the value different from semBogus as otherwise
+     an unsafe optimization may collapse both values to a single address,
+     and we heavily rely on them having distinct addresses.             */
+  static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
 
   /* There are temporary semantics for the real PPCDoubleDouble implementation.
      Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index dd19eee15f62..49d0ed55a716 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -1069,6 +1069,7 @@ StringRef sys::getHostCPUName() {
       .Case("POWER7", "pwr7")
       .Case("POWER8", "pwr8")
       .Case("POWER8E", "pwr8")
+      .Case("POWER8NVL", "pwr8")
       .Case("POWER9", "pwr9")
       .Default(generic);
 }
diff --git a/contrib/llvm/lib/Support/NativeFormatting.cpp b/contrib/llvm/lib/Support/NativeFormatting.cpp
index bb8689141098..b951a88a38db 100644
--- a/contrib/llvm/lib/Support/NativeFormatting.cpp
+++ b/contrib/llvm/lib/Support/NativeFormatting.cpp
@@ -239,10 +239,7 @@ void llvm::write_double(raw_ostream &S, double N, FloatStyle Style,
     N *= 100.0;
 
   char Buf[32];
-  unsigned Len;
-  Len = format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
-  if (Style == FloatStyle::Percent)
-    ++Len;
+  format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
   S << Buf;
   if (Style == FloatStyle::Percent)
     S << '%';
diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp
index 99d2070cb6ed..9849b3aa1ce9 100644
--- a/contrib/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@@ -118,6 +118,18 @@ void Input::beginMapping() {
   }
 }
 
+std::vector<StringRef> Input::keys() {
+  MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+  std::vector<StringRef> Ret;
+  if (!MN) {
+    setError(CurrentNode, "not a mapping");
+    return Ret;
+  }
+  for (auto &P : MN->Mapping)
+    Ret.push_back(P.first());
+  return Ret;
+}
+
 bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
                          void *&SaveInfo) {
   UseDefault = false;
@@ -163,7 +175,7 @@ void Input::endMapping() {
   if (!MN)
     return;
   for (const auto &NN : MN->Mapping) {
-    if (!MN->isValidKey(NN.first())) {
+    if (!is_contained(MN->ValidKeys, NN.first())) {
       setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'");
       break;
     }
@@ -373,14 +385,6 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
   }
 }
 
-bool Input::MapHNode::isValidKey(StringRef Key) {
-  for (const char *K : ValidKeys) {
-    if (Key.equals(K))
-      return true;
-  }
-  return false;
-}
-
 void Input::setError(const Twine &Message) {
   this->setError(CurrentNode, Message);
 }
@@ -451,6 +455,10 @@ void Output::endMapping() {
   StateStack.pop_back();
 }
 
+std::vector<StringRef> Output::keys() {
+  report_fatal_error("invalid call");
+}
+
 bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
                           bool &UseDefault, void *&) {
   UseDefault = false;
diff --git a/contrib/llvm/lib/TableGen/StringMatcher.cpp b/contrib/llvm/lib/TableGen/StringMatcher.cpp
index 16681702d1d6..0c83da65e19e 100644
--- a/contrib/llvm/lib/TableGen/StringMatcher.cpp
+++ b/contrib/llvm/lib/TableGen/StringMatcher.cpp
@@ -11,9 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include <cassert>
 #include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 /// FindFirstNonCommonLetter - Find the first character in the keys of the
@@ -67,7 +73,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
   }
   
   // Bucket the matches by the character we are comparing.
-  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
+  std::map<char, std::vector<const StringPair*>> MatchesByLetter;
   
   for (unsigned i = 0, e = Matches.size(); i != e; ++i)
     MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
@@ -91,7 +97,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
       // FIXME: Need to escape general strings.
       OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
          << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
-         << NumChars << "))\n";
+         << NumChars << ") != 0)\n";
       OS << Indent << "  break;\n";
     }
     
@@ -103,7 +109,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
   OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
   OS << Indent << "default: break;\n";
   
-  for (std::map<char, std::vector<const StringPair*> >::iterator LI = 
+  for (std::map<char, std::vector<const StringPair*>>::iterator LI = 
        MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
     // TODO: escape hard stuff (like \n) if we ever care about it.
     OS << Indent << "case '" << LI->first << "':\t // "
@@ -118,7 +124,6 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
   return true;
 }
 
-
 /// Emit - Top level entry point.
 ///
 void StringMatcher::Emit(unsigned Indent) const {
@@ -126,7 +131,7 @@ void StringMatcher::Emit(unsigned Indent) const {
   if (Matches.empty()) return;
   
   // First level categorization: group strings by length.
-  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
+  std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
   
   for (unsigned i = 0, e = Matches.size(); i != e; ++i)
     MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
@@ -136,7 +141,7 @@ void StringMatcher::Emit(unsigned Indent) const {
   OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
   OS.indent(Indent*2+2) << "default: break;\n";
   
-  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
+  for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI =
        MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
     OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
        << LI->second.size()
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index c40391d5ad9d..740766b151bb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -264,9 +264,13 @@ def ProcFalkor  : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
                                    "Qualcomm Falkor processors", [
                                    FeatureCRC,
                                    FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
                                    FeatureFPARMv8,
                                    FeatureNEON,
-                                   FeaturePerfMon
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive,
+                                   FeatureZCZeroing
                                    ]>;
 
 def ProcVulcan  : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b2d96a32fd3a..efc221893782 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -76,7 +76,6 @@ public:
   void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
   void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
 
-  void EmitXRayTable();
   void EmitSled(const MachineInstr &MI, SledKind Kind);
 
   /// \brief tblgen'erated driver function for lowering simple MI->MC
@@ -95,7 +94,7 @@ public:
     AArch64FI = F.getInfo<AArch64FunctionInfo>();
     STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
     bool Result = AsmPrinter::runOnMachineFunction(F);
-    EmitXRayTable();
+    emitXRayTable();
     return Result;
   }
 
@@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
   EmitSled(MI, SledKind::TAIL_CALL);
 }
 
-void AArch64AsmPrinter::EmitXRayTable()
-{
-  //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid
-  // code duplication as it is now for x86_64, ARM32 and AArch64.
-  if (Sleds.empty())
-    return;
-
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  auto Fn = MF->getFunction();
-  MCSection *Section;
-
-  if (STI->isTargetELF()) {
-    if (Fn->hasComdat())
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-        ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-        Fn->getComdat()->getName());
-    else
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-        ELF::SHF_ALLOC);
-  } else if (STI->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  // Before we switch over, we force a reference to a label inside the
-  // xray_instr_map section. Since EmitXRayTable() is always called just
-  // before the function's end, we assume that this is happening after the
-  // last return instruction.
-  //
-  // We then align the reference to 16 byte boundaries, which we determined
-  // experimentally to be beneficial to avoid causing decoder stalls.
-  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
-  OutStreamer->EmitCodeAlignment(16);
-  OutStreamer->EmitSymbolValue(Tmp, 8, false);
-  OutStreamer->SwitchSection(Section);
-  OutStreamer->EmitLabel(Tmp);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(14);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
-
 void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
 {
   static const int8_t NoopsInSledCount = 7;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index dcb05601e5f4..8a76c42b5898 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
   bool IsUnscaled = TII->isUnscaledLdSt(MI);
   int Offset = getLdStOffsetOp(MI).getImm();
   int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+  // Allow one more for offset.
+  if (Offset > 0)
+    Offset -= OffsetStride;
   if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
     return false;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a87204d46eae..0b0a0e7d083e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(KILL)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
   NODE_NAME_CASE(SENDMSG)
+  NODE_NAME_CASE(SENDMSGHALT)
   NODE_NAME_CASE(INTERP_MOV)
   NODE_NAME_CASE(INTERP_P1)
   NODE_NAME_CASE(INTERP_P2)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 5cc5efb331e3..745c9923de2e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -313,6 +313,7 @@ enum NodeType : unsigned {
   /// Pointer to the start of the shader's constant data.
   CONST_DATA_PTR,
   SENDMSG,
+  SENDMSGHALT,
   INTERP_MOV,
   INTERP_P1,
   INTERP_P2,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index e7b40016e272..f079c8d0c70c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
                     SDTypeProfile<0, 1, [SDTCisInt<0>]>,
                     [SDNPHasChain, SDNPInGlue]>;
 
+def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
+                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                    [SDNPHasChain, SDNPInGlue]>;
+
 def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
                         SDTypeProfile<1, 3, [SDTCisFP<0>]>,
                         [SDNPInGlue]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fa53831cbe16..c78e97dfd46f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
   switch (IntrinsicID) {
-  case AMDGPUIntrinsic::SI_sendmsg: {
+  case AMDGPUIntrinsic::SI_sendmsg:
+  case Intrinsic::amdgcn_s_sendmsg: {
     Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
     SDValue Glue = Chain.getValue(1);
     return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
                        Op.getOperand(2), Glue);
   }
+  case Intrinsic::amdgcn_s_sendmsghalt: {
+    Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+    SDValue Glue = Chain.getValue(1);
+    return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
+                       Op.getOperand(2), Glue);
+  }
   case AMDGPUIntrinsic::SI_tbuffer_store: {
     SDValue Ops[] = {
       Chain,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 202a1e9ed8ac..fceabd7a8fdd 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
     return;
 
   // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
-  if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+  if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
     BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
     LastInstWritesM0 = false;
     return;
@@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
       // signalling other hardware blocks
       if ((I->getOpcode() == AMDGPU::S_BARRIER &&
                ST->needWaitcntBeforeBarrier()) ||
-           I->getOpcode() == AMDGPU::S_SENDMSG)
+           I->getOpcode() == AMDGPU::S_SENDMSG ||
+           I->getOpcode() == AMDGPU::S_SENDMSGHALT)
         Required = LastIssued;
       else
         Required = handleOperands(*I);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 0aeb1297d3a7..73cd5774128e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
 def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
   [(AMDGPUsendmsg (i32 imm:$simm16))]
 >;
+
+def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
+  [(AMDGPUsendmsghalt (i32 imm:$simm16))]
+>;
 } // End Uses = [EXEC, M0]
 
-def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
 def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
 def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
   let simm16 = 0;
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index f20768ab77a5..8ec9cb02813c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit the rest of the function body.
   EmitFunctionBody();
 
-  // Emit the XRay table for this function.
-  EmitXRayTable();
-
   // If we need V4T thumb mode Register Indirect Jump pads, emit them.
   // These are created per function, rather than per TU, since it's
   // relatively easy to exceed the thumb branch range within a TU.
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index ce0b04d56d9e..93fed10eb2d0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -113,9 +113,6 @@ public:
   void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
   void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
   void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
-  // Helper function that emits the XRay sleds we've collected for a particular
-  // function.
-  void EmitXRayTable();
 
 private:
   void EmitSled(const MachineInstr &MI, SledKind Kind);
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index 293a527b09e8..07044b9697b6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -22,9 +22,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCStreamer.h"
 using namespace llvm;
@@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
 {
   EmitSled(MI, SledKind::TAIL_CALL);
 }
-
-void ARMAsmPrinter::EmitXRayTable()
-{
-  if (Sleds.empty())
-    return;
-
-  MCSection *Section = nullptr;
-  if (Subtarget->isTargetELF()) {
-    Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                       ELF::SHF_ALLOC | ELF::SHF_GROUP |
-                                           ELF::SHF_MERGE,
-                                       0, CurrentFnSym->getName());
-  } else if (Subtarget->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  OutStreamer->SwitchSection(Section);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 4);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(6);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
index c0591c332dea..963fb99ce09b 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -53,28 +53,36 @@
 //
 // The code below is intended to be fully target-independent.
 
+#include "BitTracker.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
-#include "BitTracker.h"
+#include <iterator>
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 
 typedef BitTracker BT;
 
 namespace {
+
   // Local trickery to pretty print a register (without the whole "%vreg"
   // business).
   struct printv {
     printv(unsigned r) : R(r) {}
+
     unsigned R;
   };
+
   raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
     if (PV.R)
       OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
@@ -82,9 +90,11 @@ namespace {
       OS << 's';
     return OS;
   }
-}
+
+} // end anonymous namespace
 
 namespace llvm {
+
   raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) {
     switch (BV.Type) {
       case BT::BitValue::Top:
@@ -167,14 +177,14 @@ namespace llvm {
 
     return OS;
   }
-}
+
+} // end namespace llvm
 
 void BitTracker::print_cells(raw_ostream &OS) const {
   for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
     dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
 }
 
-
 BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
     : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
 
@@ -182,7 +192,6 @@ BitTracker::~BitTracker() {
   delete &Map;
 }
 
-
 // If we were allowed to update a cell for a part of a register, the meet
 // operation would need to be parametrized by the register number and the
 // exact part of the register, so that the computer BitRefs correspond to
@@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
   return Changed;
 }
 
-
 // Insert the entire cell RC into the current cell at position given by M.
 BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
       const BitMask &M) {
@@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
   return *this;
 }
 
-
 BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
   uint16_t B = M.first(), E = M.last(), W = width();
   assert(B < W && E < W);
@@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
   return RC;
 }
 
-
 BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
   // Rotate left (i.e. towards increasing bit indices).
   // Swap the two parts:  [0..W-Sh-1] [W-Sh..W-1]
@@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
   return *this;
 }
 
-
 BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
       const BitValue &V) {
   assert(B <= E);
@@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
   return *this;
 }
 
-
 BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
   // Append the cell given as the argument to the "this" cell.
   // Bit 0 of RC becomes bit W of the result, where W is this->width().
@@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
   return *this;
 }
 
-
 uint16_t BT::RegisterCell::ct(bool B) const {
   uint16_t W = width();
   uint16_t C = 0;
@@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const {
   return C;
 }
 
-
 uint16_t BT::RegisterCell::cl(bool B) const {
   uint16_t W = width();
   uint16_t C = 0;
@@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const {
   return C;
 }
 
-
 bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
   uint16_t W = Bits.size();
   if (RC.Bits.size() != W)
@@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
   return true;
 }
 
-
 uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
   // The general problem is with finding a register class that corresponds
   // to a given reference reg:sub. There can be several such classes, and
@@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
   return BW;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
       const CellMapType &M) const {
   uint16_t BW = getRegBitWidth(RR);
@@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
   return RegisterCell::top(BW);
 }
 
-
 void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
       CellMapType &M) const {
   // While updating the cell map can be done in a meaningful way for
@@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
   M[RR.Reg] = RC;
 }
 
-
 // Check if the cell represents a compile-time integer value.
 bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
   uint16_t W = A.width();
@@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
   return true;
 }
 
-
 // Convert a cell to the integer value. The result must fit in uint64_t.
 uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
   assert(isInt(A));
@@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
   return Val;
 }
 
-
 // Evaluator helper functions. These implement some common operation on
 // register cells that can be used to implement target-specific instructions
 // in a target-specific evaluator.
@@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
   const APInt &A = CI->getValue();
   uint16_t BW = A.getBitWidth();
@@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width() + A2.width();
-  uint16_t Z = A1.ct(0) + A2.ct(0);
+  uint16_t Z = A1.ct(false) + A2.ct(false);
   RegisterCell Res(W);
   Res.fill(0, Z, BitValue::Zero);
   Res.fill(Z, W, BitValue::self());
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width() + A2.width();
-  uint16_t Z = A1.ct(0) + A2.ct(0);
+  uint16_t Z = A1.ct(false) + A2.ct(false);
   RegisterCell Res(W);
   Res.fill(0, Z, BitValue::Zero);
   Res.fill(Z, W, BitValue::self());
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
       uint16_t Sh) const {
   assert(Sh <= A1.width());
@@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
       uint16_t Sh) const {
   uint16_t W = A1.width();
@@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
       uint16_t Sh) const {
   uint16_t W = A1.width();
@@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
       const RegisterCell &A2) const {
   uint16_t W = A1.width();
@@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
   uint16_t W = A1.width();
   RegisterCell Res(W);
@@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
       uint16_t BitN) const {
   assert(BitN < A1.width());
@@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
       uint16_t BitN) const {
   assert(BitN < A1.width());
@@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
       uint16_t W) const {
   uint16_t C = A1.cl(B), AW = A1.width();
@@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
   return RegisterCell::self(0, W);
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
       uint16_t W) const {
   uint16_t C = A1.ct(B), AW = A1.width();
@@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
   return RegisterCell::self(0, W);
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
       uint16_t FromN) const {
   uint16_t W = A1.width();
@@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
       uint16_t FromN) const {
   uint16_t W = A1.width();
@@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
       uint16_t B, uint16_t E) const {
   uint16_t W = A1.width();
@@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
       const RegisterCell &A2, uint16_t AtN) const {
   uint16_t W1 = A1.width(), W2 = A2.width();
@@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
   return Res;
 }
 
-
 BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
   assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
   uint16_t W = getRegBitWidth(Reg);
@@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI,
   return true;
 }
 
-
 // Main W-Z implementation.
 
 void BT::visitPHI(const MachineInstr &PI) {
@@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
   }
 }
 
-
 void BT::visitUsesOf(unsigned Reg) {
   if (Trace)
     dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
@@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) {
   }
 }
 
-
 BT::RegisterCell BT::get(RegisterRef RR) const {
   return ME.getCell(RR, Map);
 }
 
-
 void BT::put(RegisterRef RR, const RegisterCell &RC) {
   ME.putCell(RR, RC, Map);
 }
 
-
 // Replace all references to bits from OldRR with the corresponding bits
 // in NewRR.
 void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
@@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
   }
 }
 
-
 // Check if the block has been "executed" during propagation. (If not, the
 // block is dead, but it may still appear to be reachable.)
 bool BT::reached(const MachineBasicBlock *B) const {
@@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const {
   return false;
 }
 
-
 // Visit an individual instruction. This could be a newly added instruction,
 // or one that has been modified by an optimization.
 void BT::visit(const MachineInstr &MI) {
@@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) {
     FlowQ.pop();
 }
 
-
 void BT::reset() {
   EdgeExec.clear();
   InstrExec.clear();
   Map.clear();
 }
 
-
 void BT::run() {
   reset();
   assert(FlowQ.empty());
@@ -1141,4 +1106,3 @@ void BT::run() {
   if (Trace)
     print_cells(dbgs() << "Cells after propagation:\n");
 }
-
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
index 74cafcd00b60..48c5f2266acf 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@@ -1,4 +1,4 @@
-//===--- BitTracker.h -----------------------------------------------------===//
+//===--- BitTracker.h -------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,24 +7,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef BITTRACKER_H
-#define BITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
-
+#include "llvm/CodeGen/MachineOperand.h"
+#include <cassert>
+#include <cstdint>
 #include <map>
 #include <queue>
 #include <set>
+#include <utility>
 
 namespace llvm {
-  class ConstantInt;
-  class MachineRegisterInfo;
-  class MachineBasicBlock;
-  class MachineInstr;
-  class MachineOperand;
-  class raw_ostream;
+
+class ConstantInt;
+class MachineRegisterInfo;
+class MachineBasicBlock;
+class MachineInstr;
+class raw_ostream;
 
 struct BitTracker {
   struct BitRef;
@@ -76,19 +79,19 @@ private:
   CellMapType &Map;
 };
 
-
 // Abstraction of a reference to bit at position Pos from a register Reg.
 struct BitTracker::BitRef {
   BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+
   bool operator== (const BitRef &BR) const {
     // If Reg is 0, disregard Pos.
     return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
   }
+
   unsigned Reg;
   uint16_t Pos;
 };
 
-
 // Abstraction of a register reference in MachineOperand.  It contains the
 // register number and the subregister index.
 struct BitTracker::RegisterRef {
@@ -96,10 +99,10 @@ struct BitTracker::RegisterRef {
     : Reg(R), Sub(S) {}
   RegisterRef(const MachineOperand &MO)
       : Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+
   unsigned Reg, Sub;
 };
 
-
 // Value that a single bit can take.  This is outside of the context of
 // any register, it is more of an abstraction of the two-element set of
 // possible bit values.  One extension here is the "Ref" type, which
@@ -158,6 +161,7 @@ struct BitTracker::BitValue {
   bool operator!= (const BitValue &V) const {
     return !operator==(V);
   }
+
   bool is(unsigned T) const {
     assert(T == 0 || T == 1);
     return T == 0 ? Type == Zero
@@ -209,6 +213,7 @@ struct BitTracker::BitValue {
   bool num() const {
     return Type == Zero || Type == One;
   }
+
   operator bool() const {
     assert(Type == Zero || Type == One);
     return Type == One;
@@ -217,7 +222,6 @@ struct BitTracker::BitValue {
   friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
 };
 
-
 // This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
 inline BitTracker::BitValue
 BitTracker::BitValue::ref(const BitValue &V) {
@@ -228,25 +232,25 @@ BitTracker::BitValue::ref(const BitValue &V) {
   return self();
 }
 
-
 inline BitTracker::BitValue
 BitTracker::BitValue::self(const BitRef &Self) {
   return BitValue(Self.Reg, Self.Pos);
 }
 
-
 // A sequence of bits starting from index B up to and including index E.
 // If E < B, the mask represents two sections: [0..E] and [B..W) where
 // W is the width of the register.
 struct BitTracker::BitMask {
-  BitMask() : B(0), E(0) {}
+  BitMask() = default;
   BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+
   uint16_t first() const { return B; }
   uint16_t last() const { return E; }
-private:
-  uint16_t B, E;
-};
 
+private:
+  uint16_t B = 0;
+  uint16_t E = 0;
+};
 
 // Representation of a register: a list of BitValues.
 struct BitTracker::RegisterCell {
@@ -255,6 +259,7 @@ struct BitTracker::RegisterCell {
   uint16_t width() const {
     return Bits.size();
   }
+
   const BitValue &operator[](uint16_t BitN) const {
     assert(BitN < Bits.size());
     return Bits[BitN];
@@ -297,12 +302,10 @@ private:
   friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
 };
 
-
 inline bool BitTracker::has(unsigned Reg) const {
   return Map.find(Reg) != Map.end();
 }
 
-
 inline const BitTracker::RegisterCell&
 BitTracker::lookup(unsigned Reg) const {
   CellMapType::const_iterator F = Map.find(Reg);
@@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const {
   return F->second;
 }
 
-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
   RegisterCell RC(Width);
@@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
   return RC;
 }
 
-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::top(uint16_t Width) {
   RegisterCell RC(Width);
@@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) {
   return RC;
 }
 
-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::ref(const RegisterCell &C) {
   uint16_t W = C.width();
@@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
 struct BitTracker::MachineEvaluator {
   MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
       : TRI(T), MRI(M) {}
-  virtual ~MachineEvaluator() {}
+  virtual ~MachineEvaluator() = default;
 
   uint16_t getRegBitWidth(const RegisterRef &RR) const;
 
   RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
   void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+
   // A result of any operation should use refs to the source cells, not
   // the cells directly. This function is a convenience wrapper to quickly
   // generate a ref for a cell corresponding to a register reference.
@@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator {
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index b78c4126e0b1..436f88dcd450 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -7,16 +7,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
 #include "Hexagon.h"
+#include "HexagonBitTracker.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonTargetMachine.h"
-#include "HexagonBitTracker.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
   }
 }
 
-
 BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+  using namespace Hexagon;
+
   if (Sub == 0)
     return MachineEvaluator::mask(Reg, 0);
-  using namespace Hexagon;
   const TargetRegisterClass *RC = MRI.getRegClass(Reg);
   unsigned ID = RC->getID();
   uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
@@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
 }
 
 namespace {
+
 class RegisterRefs {
   std::vector<BT::RegisterRef> Vector;
 
@@ -117,17 +132,21 @@ public:
   }
 
   size_t size() const { return Vector.size(); }
+
   const BT::RegisterRef &operator[](unsigned n) const {
     // The main purpose of this operator is to assert with bad argument.
     assert(n < Vector.size());
     return Vector[n];
   }
 };
-}
+
+} // end anonymous namespace
 
 bool HexagonEvaluator::evaluate(const MachineInstr &MI,
                                 const CellMapType &Inputs,
                                 CellMapType &Outputs) const {
+  using namespace Hexagon;
+
   unsigned NumDefs = 0;
 
   // Sanity verification: there should not be any defs with subregisters.
@@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
   if (NumDefs == 0)
     return false;
 
-  using namespace Hexagon;
   unsigned Opc = MI.getOpcode();
 
   if (MI.mayLoad()) {
@@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
     case S2_cl0:
     case S2_cl0p:
       // Always produce a 32-bit result.
-      return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+      return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs);
     case S2_cl1:
     case S2_cl1p:
-      return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+      return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs);
     case S2_clb:
     case S2_clbp: {
       uint16_t W1 = getRegBitWidth(Reg[1]);
@@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
     }
     case S2_ct0:
     case S2_ct0p:
-      return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+      return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs);
     case S2_ct1:
     case S2_ct1p:
-      return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+      return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs);
     case S5_popcountp:
       // TODO
       break;
@@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
 bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
                                     const CellMapType &Inputs,
                                     CellMapType &Outputs) const {
+  using namespace Hexagon;
+
   if (TII.isPredicated(MI))
     return false;
   assert(MI.mayLoad() && "A load that mayn't?");
@@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
 
   uint16_t BitNum;
   bool SignEx;
-  using namespace Hexagon;
 
   switch (Opc) {
     default:
@@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI,
   return true;
 }
 
-
 unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
   using namespace Hexagon;
+
   bool Is64 = DoubleRegsRegClass.contains(PReg);
   assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
 
@@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
   return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
 }
 
-
 unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
   typedef MachineRegisterInfo::livein_iterator iterator;
   for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
index 9e7b1dbe298f..2cbf65e66ca6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
@@ -1,4 +1,4 @@
-//===--- HexagonBitTracker.h ----------------------------------------------===//
+//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,15 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef HEXAGONBITTRACKER_H
-#define HEXAGONBITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
 
 #include "BitTracker.h"
 #include "llvm/ADT/DenseMap.h"
+#include <cstdint>
 
 namespace llvm {
-  class HexagonInstrInfo;
-  class HexagonRegisterInfo;
+
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
 
 struct HexagonEvaluator : public BitTracker::MachineEvaluator {
   typedef BitTracker::CellMapType CellMapType;
@@ -49,10 +51,12 @@ private:
   // Type of formal parameter extension.
   struct ExtType {
     enum { SExt, ZExt };
-    char Type;
-    uint16_t Width;
-    ExtType() : Type(0), Width(0) {}
+
+    ExtType() = default;
     ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+
+    char Type = 0;
+    uint16_t Width = 0;
   };
   // Map VR -> extension type.
   typedef DenseMap<unsigned, ExtType> RegExtMap;
@@ -61,4 +65,4 @@ private:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 34ce3e652995..0a7dc6b49d00 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,26 +11,45 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Hexagon.h"
 #include "HexagonHazardRecognizer.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
 #include <cctype>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
 
 using namespace llvm;
 
@@ -108,19 +127,16 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
     : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
       RI() {}
 
-
 static bool isIntRegForSubInst(unsigned Reg) {
   return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
          (Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
 }
 
-
 static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
   return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) &&
          isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi));
 }
 
-
 /// Calculate number of instructions excluding the debug instructions.
 static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
                               MachineBasicBlock::const_instr_iterator MIE) {
@@ -132,7 +148,6 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
   return Count;
 }
 
-
 /// Find the hardware loop instruction used to set-up the specified loop.
 /// On Hexagon, we have two instructions used to set-up the hardware loop
 /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
@@ -164,17 +179,16 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
         return &*I;
       // We've reached a different loop, which means the loop0 has been removed.
       if (Opc == EndLoopOp)
-        return 0;
+        return nullptr;
     }
     // Check the predecessors for the LOOP instruction.
     MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
     if (loop)
       return loop;
   }
-  return 0;
+  return nullptr;
 }
 
-
 /// Gather register def/uses from MI.
 /// This treats possible (predicated) defs as actually happening ones
 /// (conservatively).
@@ -201,7 +215,6 @@ static inline void parseOperands(const MachineInstr &MI,
   }
 }
 
-
 // Position dependent, so check twice for swap.
 static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
   switch (Ga) {
@@ -228,8 +241,6 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
   return false;
 }
 
-
-
 /// isLoadFromStackSlot - If the specified machine instruction is a direct
 /// load from a stack slot, return the virtual or physical register number of
 /// the destination along with the FrameIndex of the loaded stack slot.  If
@@ -280,7 +291,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   return 0;
 }
 
-
 /// isStoreToStackSlot - If the specified machine instruction is a direct
 /// store to a stack slot, return the virtual or physical register number of
 /// the source reg along with the FrameIndex of the loaded stack slot.  If
@@ -337,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   return 0;
 }
 
-
 /// This function can analyze one/two way branching only and should (mostly) be
 /// called by target independent side.
 /// First entry is always the opcode of the branching instruction, except when
@@ -401,7 +410,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   // Delete the J2_jump if it's equivalent to a fall-through.
   if (AllowModify && JumpToBlock &&
       MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
-    DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+    DEBUG(dbgs() << "\nErasing the jump to successor block\n";);
     I->eraseFromParent();
     I = MBB.instr_end();
     if (I == MBB.instr_begin())
@@ -415,7 +424,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   MachineInstr *LastInst = &*I;
   MachineInstr *SecondLastInst = nullptr;
   // Find one more terminator if present.
-  for (;;) {
+  while (true) {
     if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) {
       if (!SecondLastInst)
         SecondLastInst = &*I;
@@ -524,7 +533,6 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-
 unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB,
                                         int *BytesRemoved) const {
   assert(!BytesRemoved && "code size not handled");
@@ -730,7 +738,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
   return nonDbgBBSize(&MBB) <= 3;
 }
 
-
 bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
       unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
       unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
@@ -738,7 +745,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
   return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
 }
 
-
 bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
       unsigned NumInstrs, BranchProbability Probability) const {
   return NumInstrs <= 4;
@@ -853,7 +859,6 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   llvm_unreachable("Unimplemented");
 }
 
-
 void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
       const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
@@ -976,7 +981,6 @@ void HexagonInstrInfo::loadRegFromStackSlot(
   }
 }
 
-
 static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) {
   const MachineBasicBlock &B = *MI.getParent();
   Regs.addLiveOuts(B);
@@ -1307,7 +1311,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   return false;
 }
 
-
 // We indicate that we want to reverse the branch by
 // inserting the reversed branching opcode.
 bool HexagonInstrInfo::reverseBranchCondition(
@@ -1325,19 +1328,16 @@ bool HexagonInstrInfo::reverseBranchCondition(
   return false;
 }
 
-
 void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator MI) const {
   DebugLoc DL;
   BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
 }
 
-
 bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const {
   return getAddrMode(MI) == HexagonII::PostInc;
 }
 
-
 // Returns true if an instruction is predicated irrespective of the predicate
 // sense. For example, all of the following will return true.
 // if (p0) R1 = add(R2, R3)
@@ -1351,7 +1351,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const {
   return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
 }
 
-
 bool HexagonInstrInfo::PredicateInstruction(
     MachineInstr &MI, ArrayRef<MachineOperand> Cond) const {
   if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
@@ -1403,14 +1402,12 @@ bool HexagonInstrInfo::PredicateInstruction(
   return true;
 }
 
-
 bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
       ArrayRef<MachineOperand> Pred2) const {
   // TODO: Fix this
   return false;
 }
 
-
 bool HexagonInstrInfo::DefinesPredicate(
     MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
   auto &HRI = getRegisterInfo();
@@ -1427,7 +1424,6 @@ bool HexagonInstrInfo::DefinesPredicate(
   return false;
 }
 
-
 bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const {
   return MI.getDesc().isPredicable();
 }
@@ -1466,7 +1462,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
   return false;
 }
 
-
 /// Measure the specified inline asm to determine an approximation of its
 /// length.
 /// Comments (which run till the next SeparatorString or newline) do not
@@ -1502,7 +1497,6 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
   return Length;
 }
 
-
 ScheduleHazardRecognizer*
 HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
       const InstrItineraryData *II, const ScheduleDAG *DAG) const {
@@ -1513,7 +1507,6 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
 
-
 /// \brief For a comparison instruction, return the source registers in
 /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
 /// compares against in CmpValue. Return true if the comparison instruction
@@ -1609,14 +1602,12 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   return getInstrTimingClassLatency(ItinData, MI);
 }
 
-
 DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
     const TargetSubtargetInfo &STI) const {
   const InstrItineraryData *II = STI.getInstrItineraryData();
   return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
 }
 
-
 // Inspired by this pair:
 //  %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
 //  S2_storeri_io %R29, 132, %R1<kill>; flags:  mem:ST4[FixedStack1]
@@ -1661,7 +1652,6 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
   return false;
 }
 
-
 /// If the instruction is an increment of a constant value, return the amount.
 bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
       int &Value) const {
@@ -1677,7 +1667,6 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
   return false;
 }
 
-
 unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const TargetRegisterClass *TRC;
@@ -1695,18 +1684,15 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
   return NewReg;
 }
 
-
 bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const {
   return (getAddrMode(MI) == HexagonII::AbsoluteSet);
 }
 
-
 bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
 }
 
-
 bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
   const MachineFunction *MF = MI.getParent()->getParent();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -1727,13 +1713,11 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Return true if the instruction is a compund branch instruction.
 bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const {
   return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch());
 }
 
-
 bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
   return (MI.isBranch() && isPredicated(MI)) ||
          isConditionalTransfer(MI) ||
@@ -1744,7 +1728,6 @@ bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
           !isPredicatedNew(MI));
 }
 
-
 bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
     case Hexagon::A2_paddf:
@@ -1802,7 +1785,6 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
   return false;
 }
 
-
 // FIXME - Function name and it's functionality don't match.
 // It should be renamed to hasPredNewOpcode()
 bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
@@ -1814,7 +1796,6 @@ bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
   return PNewOpcode >= 0;
 }
 
-
 // Returns true if an instruction is a conditional store.
 //
 // Note: It doesn't include conditional new-value stores as they can't be
@@ -1872,7 +1853,6 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
     case Hexagon::A2_tfrt:
@@ -1893,7 +1873,6 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
   return false;
 }
 
-
 // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
 // isFPImm and later getFPImm as well.
 bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
@@ -1942,7 +1921,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
   return (ImmValue < MinValue || ImmValue > MaxValue);
 }
 
-
 bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::L4_return :
@@ -1957,7 +1935,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Return true when ConsMI uses a register defined by ProdMI.
 bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
       const MachineInstr &ConsMI) const {
@@ -1994,7 +1971,6 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
   return false;
 }
 
-
 // Returns true if the instruction is alread a .cur.
 bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
@@ -2007,7 +1983,6 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Returns true, if any one of the operands is a dot new
 // insn, whether it is predicated dot new or register dot new.
 bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const {
@@ -2017,7 +1992,6 @@ bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const {
   return false;
 }
 
-
 /// Symmetrical. See if these two instructions are fit for duplex pair.
 bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
       const MachineInstr &MIb) const {
@@ -2026,7 +2000,6 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
   return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
 }
 
-
 bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
   if (MI.mayLoad() || MI.mayStore() || MI.isCompare())
     return true;
@@ -2038,13 +2011,11 @@ bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
   return (Opcode == Hexagon::ENDLOOP0 ||
           Opcode == Hexagon::ENDLOOP1);
 }
 
-
 bool HexagonInstrInfo::isExpr(unsigned OpType) const {
   switch(OpType) {
   case MachineOperand::MO_MachineBasicBlock:
@@ -2059,7 +2030,6 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const {
   }
 }
 
-
 bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const {
   const MCInstrDesc &MID = MI.getDesc();
   const uint64_t F = MID.TSFlags;
@@ -2079,7 +2049,6 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const {
   return  false;
 }
 
-
 // This returns true in two cases:
 // - The OP code itself indicates that this is an extended instruction.
 // - One of MOs has been marked with HMOTF_ConstExtended flag.
@@ -2098,14 +2067,12 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const {
   return  false;
 }
 
-
 bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::FPPos) & HexagonII::FPMask;
 }
 
-
 // No V60 HVX VMEM with A_INDIRECT.
 bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I,
       const MachineInstr &J) const {
@@ -2116,7 +2083,6 @@ bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I,
   return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
 }
 
-
 bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::J2_callr :
@@ -2128,7 +2094,6 @@ bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::L4_return :
@@ -2143,7 +2108,6 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::J2_jumpr :
@@ -2158,7 +2122,6 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Return true if a given MI can accommodate given offset.
 // Use abs estimate as oppose to the exact number.
 // TODO: This will need to be changed to use MC level
@@ -2203,7 +2166,6 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI,
   }
 }
 
-
 bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
       const MachineInstr &ESMI) const {
   bool isLate = isLateResultInstr(LRMI);
@@ -2222,7 +2184,6 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
   return false;
 }
 
-
 bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case TargetOpcode::EXTRACT_SUBREG:
@@ -2259,14 +2220,12 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
   return true;
 }
 
-
 bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const {
   // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
   // resource, but all operands can be received late like an ALU instruction.
   return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
 }
 
-
 bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
   return Opcode == Hexagon::J2_loop0i    ||
@@ -2279,7 +2238,6 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const {
          Opcode == Hexagon::J2_loop1rext;
 }
 
-
 bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
     default: return false;
@@ -2312,46 +2270,38 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
 }
 
-
 bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
 }
 
-
 bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const {
   return isNewValueJump(MI) || isNewValueStore(MI);
 }
 
-
 bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const {
   return isNewValue(MI) && MI.isBranch();
 }
 
-
 bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
   return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
 }
 
-
 bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
 }
 
-
 bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
 }
 
-
 // Returns true if a particular operand is extendable for an instruction.
 bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI,
     unsigned OperandNum) const {
@@ -2360,28 +2310,24 @@ bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI,
           == OperandNum;
 }
 
-
 bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   assert(isPredicated(MI));
   return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
 }
 
-
 bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   assert(isPredicated(Opcode));
   return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
 }
 
-
 bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return !((F >> HexagonII::PredicatedFalsePos) &
            HexagonII::PredicatedFalseMask);
 }
 
-
 bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   // Make sure that the instruction is predicated.
@@ -2390,19 +2336,16 @@ bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
            HexagonII::PredicatedFalseMask);
 }
 
-
 bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
 }
 
-
 bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
 }
 
-
 bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   assert(get(Opcode).isBranch() &&
@@ -2410,7 +2353,6 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
   return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
 }
 
-
 bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const {
   return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
          MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT ||
@@ -2496,13 +2438,11 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
 }
 
-
 bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   case Hexagon::STriw_pred :
@@ -2513,7 +2453,6 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const {
   if (!MI.isBranch())
     return false;
@@ -2524,7 +2463,6 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const {
   return false;
 }
 
-
 // Returns true when SU has a timing class TC1.
 bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
@@ -2544,7 +2482,6 @@ bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
   switch (SchedClass) {
@@ -2561,7 +2498,6 @@ bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
   switch (SchedClass) {
@@ -2582,13 +2518,11 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const {
   }
 }
 
-
 bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const {
   unsigned SchedClass = MI.getDesc().getSchedClass();
   return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
 }
 
-
 // Schedule this ASAP.
 bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
       const MachineInstr &MI2) const {
@@ -2608,13 +2542,11 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
   return false;
 }
 
-
 bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const {
   const uint64_t V = getType(MI);
   return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
 }
 
-
 // Check if the Offset is a valid auto-inc imm by Load/Store Type.
 //
 bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
@@ -2653,7 +2585,6 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
   llvm_unreachable("Not an auto-inc opc!");
 }
 
-
 bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
       bool Extend) const {
   // This function is to check whether the "Offset" is in the correct range of
@@ -2808,12 +2739,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
                    "Please define it in the above switch statement!");
 }
 
-
 bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const {
   return isV60VectorInstruction(MI) && isAccumulator(MI);
 }
 
-
 bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const {
   const uint64_t F = get(MI.getOpcode()).TSFlags;
   const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
@@ -2822,7 +2751,6 @@ bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const {
     V == HexagonII::TypeCVI_VA_DV;
 }
 
-
 bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI,
       const MachineInstr &ConsMI) const {
   if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
@@ -2915,7 +2843,6 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const {
   }
 }
 
-
 // Add latency to instruction.
 bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
       const MachineInstr &MI2) const {
@@ -2925,7 +2852,6 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
   return false;
 }
 
-
 /// \brief Get the base register and byte offset of a load/store instr.
 bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
       unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
@@ -2937,7 +2863,6 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
   return BaseReg != 0;
 }
 
-
 /// \brief Can these instructions execute at the same time in a bundle.
 bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
       const MachineInstr &Second) const {
@@ -2959,13 +2884,11 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
   return false;
 }
 
-
 bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const {
   unsigned Opc = CallMI.getOpcode();
   return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr;
 }
 
-
 bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
   for (auto &I : *B)
     if (I.isEHLabel())
@@ -2973,7 +2896,6 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
   return false;
 }
 
-
 // Returns true if an instruction can be converted into a non-extended
 // equivalent instruction.
 bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const {
@@ -3011,13 +2933,11 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const {
   return false;
 }
 
-
 bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const {
   return Hexagon::getRealHWInstr(MI.getOpcode(),
                                  Hexagon::InstrType_Pseudo) >= 0;
 }
 
-
 bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
       const {
   MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
@@ -3029,7 +2949,6 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
   return false;
 }
 
-
 // Returns true, if a LD insn can be promoted to a cur load.
 bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
   auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
@@ -3038,14 +2957,12 @@ bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
          HST.hasV60TOps();
 }
 
-
 // Returns true, if a ST insn can be promoted to a new-value store.
 bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
 }
 
-
 bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI,
       const MachineInstr &ConsMI) const {
   // There is no stall when ProdMI is not a V60 vector.
@@ -3064,7 +2981,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI,
   return true;
 }
 
-
 bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
       MachineBasicBlock::const_instr_iterator BII) const {
   // There is no stall when I is not a V60 vector.
@@ -3091,7 +3007,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
   return false;
 }
 
-
 bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
       unsigned PredReg) const {
   for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) {
@@ -3106,7 +3021,6 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
   return MI.getOpcode() != Hexagon::A4_tlbmatch;
 }
 
-
 bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
   return (Opcode == Hexagon::J2_jumpt)      ||
          (Opcode == Hexagon::J2_jumpf)      ||
@@ -3116,25 +3030,21 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
          (Opcode == Hexagon::J2_jumpfnewpt);
 }
 
-
 bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
   if (Cond.empty() || !isPredicated(Cond[0].getImm()))
     return false;
   return !isPredicatedTrue(Cond[0].getImm());
 }
 
-
 short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const {
   return Hexagon::getAbsoluteForm(MI.getOpcode());
 }
 
-
 unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
 }
 
-
 // Returns the base register in a memory access (load/store). The offset is
 // returned in Offset and the access size is returned in AccessSize.
 unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
@@ -3171,7 +3081,6 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
   return MI.getOperand(basePos).getReg();
 }
 
-
 /// Return the position of the base and offset operands for this instruction.
 bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
       unsigned &BasePos, unsigned &OffsetPos) const {
@@ -3203,7 +3112,6 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
   return true;
 }
 
-
 // Inserts branching instructions in reverse order of their occurrence.
 // e.g. jump_t t1 (i1)
 // jump t2        (i2)
@@ -3265,24 +3173,20 @@ SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
   return Jumpers;
 }
 
-
 short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const {
   if (Opcode < 0)
     return -1;
   return Hexagon::getBaseWithLongOffset(Opcode);
 }
 
-
 short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const {
   return Hexagon::getBaseWithLongOffset(MI.getOpcode());
 }
 
-
 short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const {
   return Hexagon::getBaseWithRegOffset(MI.getOpcode());
 }
 
-
 // Returns Operand Index for the constant extended instruction.
 unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
@@ -3379,7 +3283,6 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
   return HexagonII::HCG_None;
 }
 
-
 // Returns -1 when there is no opcode found.
 unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
       const MachineInstr &GB) const {
@@ -3398,7 +3301,6 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
   return -1;
 }
 
-
 int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   enum Hexagon::PredSense inPredSense;
   inPredSense = invertPredicate ? Hexagon::PredSense_false :
@@ -3410,7 +3312,6 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   llvm_unreachable("Unexpected predicable instruction");
 }
 
-
 // Return the cur value instruction for a given store.
 int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
@@ -3428,8 +3329,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
   return 0;
 }
 
-
-
 // The diagram below shows the steps involved in the conversion of a predicated
 // store instruction to its .new predicated new-value form.
 //
@@ -3509,7 +3408,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
 // promoted. Therefore, in case of dependence check failure (due to R5) during
 // next iteration, it should be converted back to its most basic form.
 
-
 // Return the new value instruction for a given store.
 int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
   int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode());
@@ -3552,7 +3450,6 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
   return 0;
 }
 
-
 // Returns the opcode to use when converting MI, which is a conditional jump,
 // into a conditional instruction which uses the .new value of the predicate.
 // We also use branch probabilities to add a hint to the jump.
@@ -3579,7 +3476,6 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
   }
 }
 
-
 // Return .new predicate version for an instruction.
 int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
       const MachineBranchProbabilityInfo *MBPI) const {
@@ -3599,7 +3495,6 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
   return 0;
 }
 
-
 int HexagonInstrInfo::getDotOldOp(const int opc) const {
   int NewOp = opc;
   if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
@@ -3615,7 +3510,6 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const {
   return NewOp;
 }
 
-
 // See if instruction could potentially be a duplex candidate.
 // If so, return its group. Zero otherwise.
 HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
@@ -3960,12 +3854,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   return HexagonII::HSIG_None;
 }
 
-
 short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const {
   return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real);
 }
 
-
 // Return first non-debug instruction in the basic block.
 MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
       const {
@@ -3978,7 +3870,6 @@ MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
   return nullptr;
 }
 
-
 unsigned HexagonInstrInfo::getInstrTimingClassLatency(
       const InstrItineraryData *ItinData, const MachineInstr &MI) const {
   // Default to one cycle for no itinerary. However, an "empty" itinerary may
@@ -4000,7 +3891,6 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency(
   return Latency;
 }
 
-
 // inverts the predication logic.
 // p -> NotP
 // NotP -> P
@@ -4013,7 +3903,6 @@ bool HexagonInstrInfo::getInvertedPredSense(
   return true;
 }
 
-
 unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
   int InvPredOpcode;
   InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
@@ -4024,7 +3913,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
   llvm_unreachable("Unexpected predicated instruction");
 }
 
-
 // Returns the max value that doesn't need to be extended.
 int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
@@ -4039,13 +3927,11 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const {
     return ~(-1U << bits);
 }
 
-
 unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
 }
 
-
 // Returns the min value that doesn't need to be extended.
 int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
@@ -4060,7 +3946,6 @@ int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const {
     return 0;
 }
 
-
 // Returns opcode of the non-extended equivalent instruction.
 short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const {
   // Check if the instruction has a register form that uses register in place
@@ -4086,7 +3971,6 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const {
   return -1;
 }
 
-
 bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
       unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
   if (Cond.empty())
@@ -4107,17 +3991,14 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
   return true;
 }
 
-
 short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const {
   return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo);
 }
 
-
 short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const {
   return Hexagon::getRegForm(MI.getOpcode());
 }
 
-
 // Return the number of bytes required to encode the instruction.
 // Hexagon instructions are fixed length, 4 bytes, unless they
 // use a constant extender, which requires another 4 bytes.
@@ -4156,13 +4037,11 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const {
   return Size;
 }
 
-
 uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
 }
 
-
 unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
   const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget();
   const InstrItineraryData &II = *ST.getInstrItineraryData();
@@ -4171,19 +4050,16 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
   return IS.getUnits();
 }
 
-
 unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
 }
 
-
 // Calculate size of the basic block without debug instructions.
 unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
   return nonDbgMICount(BB->instr_begin(), BB->instr_end());
 }
 
-
 unsigned HexagonInstrInfo::nonDbgBundleSize(
       MachineBasicBlock::const_iterator BundleHead) const {
   assert(BundleHead->isBundle() && "Not a bundle header");
@@ -4192,7 +4068,6 @@ unsigned HexagonInstrInfo::nonDbgBundleSize(
   return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator()));
 }
 
-
 /// immediateExtend - Changes the instruction in place to one using an immediate
 /// extender.
 void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
@@ -4208,7 +4083,6 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
   MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
 }
 
-
 bool HexagonInstrInfo::invertAndChangeJumpTarget(
       MachineInstr &MI, MachineBasicBlock *NewTarget) const {
   DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
@@ -4229,7 +4103,6 @@ bool HexagonInstrInfo::invertAndChangeJumpTarget(
   return true;
 }
 
-
 void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
   /* +++ The code below is used to generate complete set of Hexagon Insn +++ */
   MachineFunction::iterator A = MF.begin();
@@ -4248,7 +4121,6 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
   /* --- The code above is used to generate complete set of Hexagon Insn --- */
 }
 
-
 // inverts the predication logic.
 // p -> NotP
 // NotP -> P
@@ -4258,7 +4130,6 @@ bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const {
   return true;
 }
 
-
 // Reverse the branch prediction.
 unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
   int PredRevOpcode = -1;
@@ -4270,14 +4141,12 @@ unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
   return PredRevOpcode;
 }
 
-
 // TODO: Add more rigorous validation.
 bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
       const {
   return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
 }
 
-
 short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const {
   return Hexagon::xformRegToImmOffset(MI.getOpcode());
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 2d184d1484e9..2358d4b7e4c0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,9 +16,14 @@
 
 #include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
+#include <vector>
 
 #define GET_INSTRINFO_HEADER
 #include "HexagonGenInstrInfo.inc"
@@ -29,9 +34,10 @@ struct EVT;
 class HexagonSubtarget;
 
 class HexagonInstrInfo : public HexagonGenInstrInfo {
-  virtual void anchor();
   const HexagonRegisterInfo RI;
 
+  virtual void anchor();
+
 public:
   explicit HexagonInstrInfo(HexagonSubtarget &ST);
 
@@ -260,7 +266,7 @@ public:
   /// PredCost.
   unsigned getInstrLatency(const InstrItineraryData *ItinData,
                            const MachineInstr &MI,
-                           unsigned *PredCost = 0) const override;
+                           unsigned *PredCost = nullptr) const override;
 
   /// Create machine specific model for scheduling.
   DFAPacketizer *
@@ -378,7 +384,6 @@ public:
   bool PredOpcodeHasJMP_c(unsigned Opcode) const;
   bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
 
-
   short getAbsoluteForm(const MachineInstr &MI) const;
   unsigned getAddrMode(const MachineInstr &MI) const;
   unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
@@ -421,13 +426,11 @@ public:
   unsigned getUnits(const MachineInstr &MI) const;
   unsigned getValidSubTargets(const unsigned Opcode) const;
 
-
   /// getInstrTimingClassLatency - Compute the instruction latency of a given
   /// instruction using Timing Class information, if available.
   unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
   unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
 
-
   void immediateExtend(MachineInstr &MI) const;
   bool invertAndChangeJumpTarget(MachineInstr &MI,
                                  MachineBasicBlock* NewTarget) const;
@@ -438,6 +441,6 @@ public:
   short xformRegToImmOffset(const MachineInstr &MI) const;
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 371b52108b9b..d83bcbc41553 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -15,33 +15,31 @@
 
 namespace llvm {
 
-  namespace Hexagon {
+namespace Hexagon {
+
     const unsigned int StartPacket = 0x1;
     const unsigned int EndPacket = 0x2;
-  }
 
+} // end namespace Hexagon
 
 /// Hexagon target-specific information for each MachineFunction.
 class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   // SRetReturnReg - Some subtargets require that sret lowering includes
   // returning the value of the returned struct in a register. This field
   // holds the virtual register into which the sret argument is passed.
-  unsigned SRetReturnReg;
-  unsigned StackAlignBaseVReg;    // Aligned-stack base register (virtual)
-  unsigned StackAlignBasePhysReg; //                             (physical)
+  unsigned SRetReturnReg = 0;
+  unsigned StackAlignBaseVReg = 0;    // Aligned-stack base register (virtual)
+  unsigned StackAlignBasePhysReg = 0; //                             (physical)
   int VarArgsFrameIndex;
-  bool HasClobberLR;
-  bool HasEHReturn;
+  bool HasClobberLR = false;
+  bool HasEHReturn = false;
   std::map<const MachineInstr*, unsigned> PacketInfo;
   virtual void anchor();
 
 public:
-  HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0),
-      StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {}
+  HexagonMachineFunctionInfo() = default;
 
-  HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
-      StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0),
-      HasEHReturn(false) {}
+  HexagonMachineFunctionInfo(MachineFunction &MF) {}
 
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -75,6 +73,7 @@ public:
   void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; }
   unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; }
 };
-} // End llvm namespace
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index e902f600e881..c9c4f95dbaaa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -10,17 +10,27 @@
 // This file contains the declarations of the HexagonTargetAsmInfo properties.
 //
 //===----------------------------------------------------------------------===//
+
 #define DEBUG_TYPE "hexagon-sdata"
 
-#include "HexagonTargetMachine.h"
 #include "HexagonTargetObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
 // (e.g. -debug and -debug-only=globallayout)
 #define TRACE_TO(s, X) s << X
 #ifdef NDEBUG
-#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0)
+#define TRACE(X)                                                               \
+  do {                                                                         \
+    if (TraceGVPlacement) {                                                    \
+      TRACE_TO(errs(), X);                                                     \
+    }                                                                          \
+  } while (false)
 #else
-#define TRACE(X) \
-  do { \
-    if (TraceGVPlacement) { TRACE_TO(errs(), X); } \
-    else { DEBUG( TRACE_TO(dbgs(), X) ); } \
-  } while (0)
+#define TRACE(X)                                                               \
+  do {                                                                         \
+    if (TraceGVPlacement) {                                                    \
+      TRACE_TO(errs(), X);                                                     \
+    } else {                                                                   \
+      DEBUG(TRACE_TO(dbgs(), X));                                              \
+    }                                                                          \
+  } while (false)
 #endif
 
 // Returns true if the section name is such that the symbol will be put
@@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) {
          Sec.find(".scommon.") != StringRef::npos;
 }
 
-
 static const char *getSectionSuffixForSize(unsigned Size) {
   switch (Size) {
   default:
@@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
   return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM);
 }
 
-
 /// Return true if this global value should be placed into small data/bss
 /// section.
 bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
@@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
   return true;
 }
 
-
 bool HexagonTargetObjectFile::isSmallDataEnabled() const {
   return SmallDataThreshold > 0;
 }
 
-
 unsigned HexagonTargetObjectFile::getSmallDataSize() const {
   return SmallDataThreshold;
 }
 
-
 /// Descends any type down to "elementary" components,
 /// discovering the smallest addressable one.
 /// If zero is returned, declaration will not be modified.
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 5feaffe6efb9..9a09a17767a6 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -1,5 +1,4 @@
-
-//=== HexagonMCCompound.cpp - Hexagon Compound checker  -------===//
+//=== HexagonMCCompound.cpp - Hexagon Compound checker  -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,18 +10,17 @@
 // This file is looks at a packet and tries to form compound insns
 //
 //===----------------------------------------------------------------------===//
+
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCShuffler.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCAssembler.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 using namespace Hexagon;
@@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = {
 };
 
 // enum HexagonII::CompoundGroup
-namespace {
-unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
+static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
   unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
 
   switch (MI.getOpcode()) {
@@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
 
   return HexagonII::HCG_None;
 }
-}
 
 /// getCompoundOp - Return the index from 0-7 into the above opcode lists.
-namespace {
-unsigned getCompoundOp(MCInst const &HMCI) {
+static unsigned getCompoundOp(MCInst const &HMCI) {
   const MCOperand &Predicate = HMCI.getOperand(0);
   unsigned PredReg = Predicate.getReg();
 
@@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) {
     return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
   }
 }
-}
 
-namespace {
-MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
-  MCInst *CompoundInsn = 0;
+static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
+                               MCInst const &R) {
+  MCInst *CompoundInsn = nullptr;
   unsigned compoundOpcode;
   MCOperand Rs, Rt;
   int64_t Value;
@@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
 
   return CompoundInsn;
 }
-}
 
 /// Non-Symmetrical. See if these two instructions are fit for compound pair.
-namespace {
-bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
-                           MCInst const &MIb, bool IsExtendedB) {
+static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
+                                  MCInst const &MIb, bool IsExtendedB) {
   unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
   unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
   // We have two candidates - check that this is the same register
@@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
   return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
           (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
 }
-}
 
-namespace {
-bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
+static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
+                            MCInst &MCI) {
   assert(HexagonMCInstrInfo::isBundle(MCI));
   bool JExtended = false;
   for (MCInst::iterator J =
@@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
       JExtended = true;
       continue;
     }
-    if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
-        HexagonII::TypeJ) {
+    if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) {
       // Try to pair with another insn (B)undled with jump.
       bool BExtended = false;
       for (MCInst::iterator B =
@@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
   }
   return false;
 }
-}
 
 /// tryCompound - Given a bundle check for compound insns when one
 /// is found update the contents fo the bundle with the compound insn.
@@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
   // a compound is found.
   while (lookForCompound(MCII, Context, MCI))
     ;
-
-  return;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
index 517f17cc9c64..5ece11bd5ce4 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -1,4 +1,4 @@
-//===--- RDFCopy.h --------------------------------------------------------===//
+//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,23 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef RDF_COPY_H
-#define RDF_COPY_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
 
 #include "RDFGraph.h"
 #include <map>
 #include <vector>
 
 namespace llvm {
+
   class MachineBasicBlock;
   class MachineDominatorTree;
   class MachineInstr;
 
 namespace rdf {
+
   struct CopyPropagation {
     CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
         Trace(false) {}
-    virtual ~CopyPropagation() {}
+
+    virtual ~CopyPropagation() = default;
 
     bool run();
     void trace(bool On) { Trace = On; }
@@ -49,7 +52,9 @@ namespace rdf {
     void updateMap(NodeAddr<InstrNode*> IA);
     bool scanBlock(MachineBasicBlock *B);
   };
-} // namespace rdf
-} // namespace llvm
 
-#endif
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
index 33c3f03790f3..fa272ea1a76a 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -10,16 +10,31 @@
 // Target-independent, SSA-based data flow graph for register data flow (RDF).
 //
 #include "RDFGraph.h"
-
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominanceFrontier.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 using namespace rdf;
@@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
   return OS;
 }
 
-namespace {
-  void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
-        const DataFlowGraph &G) {
-    OS << Print<NodeId>(RA.Id, G) << '<'
-       << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
-    if (RA.Addr->getFlags() & NodeAttrs::Fixed)
-      OS << '!';
-  }
+static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
+                const DataFlowGraph &G) {
+  OS << Print<NodeId>(RA.Id, G) << '<'
+     << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
+  if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+    OS << '!';
 }
 
 template<>
@@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
 }
 
 namespace {
+
   template <typename T>
   struct PrintListV {
     PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+
     typedef T Type;
     const NodeList &List;
     const DataFlowGraph &G;
@@ -201,7 +216,8 @@ namespace {
     }
     return OS;
   }
-}
+
+} // end anonymous namespace
 
 template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
@@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS,
   // Print the target for calls and branches (for readability).
   if (MI.isCall() || MI.isBranch()) {
     MachineInstr::const_mop_iterator T =
-          find_if(MI.operands(),
-                  [] (const MachineOperand &Op) -> bool {
-                    return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
-                  });
+          llvm::find_if(MI.operands(),
+                        [] (const MachineOperand &Op) -> bool {
+                          return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+                        });
     if (T != MI.operands_end()) {
       OS << ' ';
       if (T->isMBB())
@@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS,
   return OS;
 }
 
-} // namespace rdf
-} // namespace llvm
+} // end namespace rdf
+} // end namespace llvm
 
 // Node allocation functions.
 //
@@ -390,7 +406,6 @@ void NodeAllocator::clear() {
   ActiveEnd = nullptr;
 }
 
-
 // Insert node NA after "this" in the circular chain.
 void NodeBase::append(NodeAddr<NodeBase*> NA) {
   NodeId Nx = Next;
@@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
   }
 }
 
-
 // Fundamental node manipulator functions.
 
 // Obtain the register reference from a reference node.
@@ -590,7 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
   return findBlock(EntryB, G);
 }
 
-
 // Target operand information.
 //
 
@@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
   return false;
 }
 
-
 RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
   RegisterId SuperReg = RR.Reg;
   while (true) {
@@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const {
   OS << " }";
 }
 
-
 //
 // The data flow graph construction.
 //
@@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const {
 DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
       const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
       const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
-    : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
+    : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
 }
 
-
 // The implementation of the definition stack.
 // Each register reference has its own definition stack. In particular,
 // for a register references "Reg" and "Reg:subreg" will each have their
@@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
   return P;
 }
 
-
 // Register information.
 
 // Get the list of references aliased to RR. Lane masks are ignored.
@@ -915,7 +924,6 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
   return NA;
 }
 
-
 // Allocation routines for specific node types/kinds.
 
 NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
@@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
   return false;
 }
 
-
 // Clear all information in the graph.
 void DataFlowGraph::reset() {
   Memory.clear();
@@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() {
   Func = NodeAddr<FuncNode*>();
 }
 
-
 // Return the next reference node in the instruction node IA that is related
 // to RA. Conceptually, two reference nodes are related if they refer to the
 // same instance of a register access, but differ in flags or other minor
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
index 871062ff2b05..49d78a8b22b5 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
@@ -1,4 +1,4 @@
-//===--- RDFGraph.h -------------------------------------------------------===//
+//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -221,20 +221,25 @@
 // The statement s5 has two use nodes for t0: u7" and u9". The quotation
 // mark " indicates that the node is a shadow.
 //
-#ifndef RDF_GRAPH_H
-#define RDF_GRAPH_H
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
 
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include <cassert>
+#include <cstdint>
+#include <cstring>
 #include <functional>
 #include <map>
 #include <set>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
 // RDF uses uint32_t to refer to registers. This is to ensure that the type
@@ -243,6 +248,7 @@
 static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
 
 namespace llvm {
+
   class MachineBasicBlock;
   class MachineFunction;
   class MachineInstr;
@@ -252,6 +258,7 @@ namespace llvm {
   class TargetInstrInfo;
 
 namespace rdf {
+
   typedef uint32_t NodeId;
   typedef uint32_t RegisterId;
 
@@ -293,9 +300,11 @@ namespace rdf {
     static uint16_t set_type(uint16_t A, uint16_t T) {
       return (A & ~TypeMask) | T;
     }
+
     static uint16_t set_kind(uint16_t A, uint16_t K) {
       return (A & ~KindMask) | K;
     }
+
     static uint16_t set_flags(uint16_t A, uint16_t F) {
       return (A & ~FlagMask) | F;
     }
@@ -326,9 +335,14 @@ namespace rdf {
   };
 
   template <typename T> struct NodeAddr {
-    NodeAddr() : Addr(nullptr), Id(0) {}
+    NodeAddr() : Addr(nullptr) {}
     NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
 
+    // Type cast (casting constructor). The reason for having this class
+    // instead of std::pair.
+    template <typename S> NodeAddr(const NodeAddr<S> &NA)
+      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
     bool operator== (const NodeAddr<T> &NA) const {
       assert((Addr == NA.Addr) == (Id == NA.Id));
       return Addr == NA.Addr;
@@ -336,13 +350,9 @@ namespace rdf {
     bool operator!= (const NodeAddr<T> &NA) const {
       return !operator==(NA);
     }
-    // Type cast (casting constructor). The reason for having this class
-    // instead of std::pair.
-    template <typename S> NodeAddr(const NodeAddr<S> &NA)
-      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
 
     T Addr;
-    NodeId Id;
+    NodeId Id = 0;
   };
 
   struct NodeBase;
@@ -366,17 +376,20 @@ namespace rdf {
   struct NodeAllocator {
     // Amount of storage for a single node.
     enum { NodeMemSize = 32 };
+
     NodeAllocator(uint32_t NPB = 4096)
         : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
-          IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
+          IndexMask((1 << BitsPerIndex)-1) {
       assert(isPowerOf2_32(NPB));
     }
+
     NodeBase *ptr(NodeId N) const {
       uint32_t N1 = N-1;
       uint32_t BlockN = N1 >> BitsPerIndex;
       uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
       return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
     }
+
     NodeId id(const NodeBase *P) const;
     NodeAddr<NodeBase*> New();
     void clear();
@@ -384,6 +397,7 @@ namespace rdf {
   private:
     void startNewBlock();
     bool needNewBlock();
+
     uint32_t makeId(uint32_t Block, uint32_t Index) const {
       // Add 1 to the id, to avoid the id of 0, which is treated as "null".
       return ((Block << BitsPerIndex) | Index) + 1;
@@ -392,7 +406,7 @@ namespace rdf {
     const uint32_t NodesPerBlock;
     const uint32_t BitsPerIndex;
     const uint32_t IndexMask;
-    char *ActiveEnd;
+    char *ActiveEnd = nullptr;
     std::vector<char*> Blocks;
     typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
     AllocatorTy MemPool;
@@ -405,6 +419,7 @@ namespace rdf {
     RegisterRef() : RegisterRef(0) {}
     explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
       : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+
     operator bool() const { return Reg != 0 && Mask.any(); }
     bool operator== (const RegisterRef &RR) const {
       return Reg == RR.Reg && Mask == RR.Mask;
@@ -420,7 +435,8 @@ namespace rdf {
 
   struct TargetOperandInfo {
     TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
-    virtual ~TargetOperandInfo() {}
+    virtual ~TargetOperandInfo() = default;
+
     virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
     virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
     virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
@@ -428,7 +444,6 @@ namespace rdf {
     const TargetInstrInfo &TII;
   };
 
-
   // Packed register reference. Only used for storage.
   struct PackedRegisterRef {
     RegisterId Reg;
@@ -442,11 +457,13 @@ namespace rdf {
   template <typename T, unsigned N = 32>
   struct IndexedSet {
     IndexedSet() : Map() { Map.reserve(N); }
+
     T get(uint32_t Idx) const {
       // Index Idx corresponds to Map[Idx-1].
       assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
       return Map[Idx-1];
     }
+
     uint32_t insert(T Val) {
       // Linear search.
       auto F = llvm::find(Map, Val);
@@ -455,11 +472,13 @@ namespace rdf {
       Map.push_back(Val);
       return Map.size();  // Return actual_index + 1.
     }
+
     uint32_t find(T Val) const {
       auto F = llvm::find(Map, Val);
       assert(F != Map.end());
       return F - Map.begin();
     }
+
   private:
     std::vector<T> Map;
   };
@@ -478,12 +497,14 @@ namespace rdf {
       assert(LM.any());
       return LM.all() ? 0 : find(LM);
     }
+
     PackedRegisterRef pack(RegisterRef RR) {
       return { RR.Reg, getIndexForLaneMask(RR.Mask) };
     }
     PackedRegisterRef pack(RegisterRef RR) const {
       return { RR.Reg, getIndexForLaneMask(RR.Mask) };
     }
+
     RegisterRef unpack(PackedRegisterRef PR) const {
       return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
     }
@@ -491,11 +512,8 @@ namespace rdf {
 
   struct RegisterAggr {
     RegisterAggr(const TargetRegisterInfo &tri)
-        : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false),
-          TRI(tri) {}
-    RegisterAggr(const RegisterAggr &RG)
-        : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits),
-          CheckUnits(RG.CheckUnits), TRI(RG.TRI) {}
+        : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
+    RegisterAggr(const RegisterAggr &RG) = default;
 
     bool empty() const { return Masks.empty(); }
     bool hasAliasOf(RegisterRef RR) const;
@@ -530,11 +548,11 @@ namespace rdf {
     const TargetRegisterInfo &TRI;
   };
 
-
   struct NodeBase {
   public:
     // Make sure this is a POD.
     NodeBase() = default;
+
     uint16_t getType()  const { return NodeAttrs::type(Attrs); }
     uint16_t getKind()  const { return NodeAttrs::kind(Attrs); }
     uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
@@ -596,29 +614,36 @@ namespace rdf {
 
   struct RefNode : public NodeBase {
     RefNode() = default;
+
     RegisterRef getRegRef(const DataFlowGraph &G) const;
+
     MachineOperand &getOp() {
       assert(!(getFlags() & NodeAttrs::PhiRef));
       return *Ref.Op;
     }
+
     void setRegRef(RegisterRef RR, DataFlowGraph &G);
     void setRegRef(MachineOperand *Op, DataFlowGraph &G);
+
     NodeId getReachingDef() const {
       return Ref.RD;
     }
     void setReachingDef(NodeId RD) {
       Ref.RD = RD;
     }
+
     NodeId getSibling() const {
       return Ref.Sib;
     }
     void setSibling(NodeId Sib) {
       Ref.Sib = Sib;
     }
+
     bool isUse() const {
       assert(getType() == NodeAttrs::Ref);
       return getKind() == NodeAttrs::Use;
     }
+
     bool isDef() const {
       assert(getType() == NodeAttrs::Ref);
       return getKind() == NodeAttrs::Def;
@@ -702,6 +727,7 @@ namespace rdf {
     MachineBasicBlock *getCode() const {
       return CodeNode::getCode<MachineBasicBlock*>();
     }
+
     void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
   };
 
@@ -709,6 +735,7 @@ namespace rdf {
     MachineFunction *getCode() const {
       return CodeNode::getCode<MachineFunction*>();
     }
+
     NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
         const DataFlowGraph &G) const;
     NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
@@ -723,6 +750,7 @@ namespace rdf {
     template <typename T> T ptr(NodeId N) const {
       return static_cast<T>(ptr(N));
     }
+
     NodeId id(const NodeBase *P) const;
 
     template <typename T> NodeAddr<T> addr(NodeId N) const {
@@ -738,13 +766,17 @@ namespace rdf {
 
     struct DefStack {
       DefStack() = default;
+
       bool empty() const { return Stack.empty() || top() == bottom(); }
+
     private:
       typedef NodeAddr<DefNode*> value_type;
       struct Iterator {
         typedef DefStack::value_type value_type;
+
         Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
         Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+
         value_type operator*() const {
           assert(Pos >= 1);
           return DS.Stack[Pos-1];
@@ -755,14 +787,17 @@ namespace rdf {
         }
         bool operator==(const Iterator &It) const { return Pos == It.Pos; }
         bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+
       private:
         Iterator(const DefStack &S, bool Top);
+
         // Pos-1 is the index in the StorageType object that corresponds to
         // the top of the DefStack.
         const DefStack &DS;
         unsigned Pos;
         friend struct DefStack;
       };
+
     public:
       typedef Iterator iterator;
       iterator top() const { return Iterator(*this, true); }
@@ -773,14 +808,18 @@ namespace rdf {
       void pop();
       void start_block(NodeId N);
       void clear_block(NodeId N);
+
     private:
       friend struct Iterator;
       typedef std::vector<value_type> StorageType;
+
       bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
         return (P.Addr == nullptr) && (N == 0 || P.Id == N);
       }
+
       unsigned nextUp(unsigned P) const;
       unsigned nextDown(unsigned P) const;
+
       StorageType Stack;
     };
 
@@ -819,6 +858,7 @@ namespace rdf {
       if (RemoveFromOwner)
         removeFromOwner(UA);
     }
+
     void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
       unlinkDefDF(DA);
       if (RemoveFromOwner)
@@ -831,23 +871,28 @@ namespace rdf {
       return BA.Addr->getType() == NodeAttrs::Ref &&
              BA.Addr->getKind() == Kind;
     }
+
     template <uint16_t Kind>
     static bool IsCode(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Code &&
              BA.Addr->getKind() == Kind;
     }
+
     static bool IsDef(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Ref &&
              BA.Addr->getKind() == NodeAttrs::Def;
     }
+
     static bool IsUse(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Ref &&
              BA.Addr->getKind() == NodeAttrs::Use;
     }
+
     static bool IsPhi(const NodeAddr<NodeBase*> BA) {
       return BA.Addr->getType() == NodeAttrs::Code &&
              BA.Addr->getKind() == NodeAttrs::Phi;
     }
+
     static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
       uint16_t Flags = DA.Addr->getFlags();
       return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
@@ -902,6 +947,7 @@ namespace rdf {
 
     void unlinkUseDF(NodeAddr<UseNode*> UA);
     void unlinkDefDF(NodeAddr<DefNode*> DA);
+
     void removeFromOwner(NodeAddr<RefNode*> RA) {
       NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
       IA.Addr->removeMember(RA, *this);
@@ -967,7 +1013,6 @@ namespace rdf {
     return MM;
   }
 
-
   // Optionally print the lane mask, if it is not ~0.
   struct PrintLaneMaskOpt {
     PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
@@ -991,7 +1036,9 @@ namespace rdf {
     PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
       : Print<NodeAddr<T>>(x, g) {}
   };
-} // namespace rdf
-} // namespace llvm
 
-#endif // RDF_GRAPH_H
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 6f0fdddd7d55..92d3c001df94 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   return MipsDAGToDAGISel::runOnMachineFunction(MF);
 }
 
+void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  SelectionDAGISel::getAnalysisUsage(AU);
+}
+
 void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
                                                MachineFunction &MF) {
   MachineInstrBuilder MIB(MF, &MI);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 2a8e5877e848..f89a350cab04 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -28,6 +28,8 @@ private:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
   void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
                              MachineFunction &MF);
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index aa3ffde24b99..2b9195b095e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
 static bool isFunctionGlobalAddress(SDValue Callee);
 
 static bool
-resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
+resideInSameSection(const Function *Caller, SDValue Callee,
+                    const TargetMachine &TM) {
   // If !G, Callee can be an external symbol.
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-  if (!G) return false;
-
-  const GlobalValue *GV = G->getGlobal();
-
-  if (GV->isDeclaration()) return false;
-
-  switch(GV->getLinkage()) {
-  default: llvm_unreachable("unknow linkage type");
-  case GlobalValue::AvailableExternallyLinkage:
-  case GlobalValue::ExternalWeakLinkage:
+  if (!G)
     return false;
 
-  // Callee with weak linkage is allowed if it has hidden or protected
-  // visibility
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:     // e.g. c++ template instantiation
-    if (GV->hasDefaultVisibility())
-      return false;
+  const GlobalValue *GV = G->getGlobal();
+  if (!GV->isStrongDefinitionForLinker())
+    return false;
 
-  case GlobalValue::ExternalLinkage:
-  case GlobalValue::InternalLinkage:
-  case GlobalValue::PrivateLinkage:
-    break;
+  // Any explicitly-specified sections and section prefixes must also match.
+  // Also, if we're using -ffunction-sections, then each function is always in
+  // a different section (the same is true for COMDAT functions).
+  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
+      GV->getSection() != Caller->getSection())
+    return false;
+  if (const auto *F = dyn_cast<Function>(GV)) {
+    if (F->getSectionPrefix() != Caller->getSectionPrefix())
+      return false;
   }
 
-  // With '-fPIC', calling default visiblity function need insert 'nop' after
-  // function call, no matter that function resides in same module or not, so
-  // we treat it as in different module.
-  if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
+  // If the callee might be interposed, then we can't assume the ultimate call
+  // target will be in the same section. Even in cases where we can assume that
+  // interposition won't happen, in any case where the linker might insert a
+  // stub to allow for interposition, we must generate code as though
+  // interposition might occur. To understand why this matters, consider a
+  // situation where: a -> b -> c where the arrows indicate calls. b and c are
+  // in the same section, but a is in a different module (i.e. has a different
+  // TOC base pointer). If the linker allows for interposition between b and c,
+  // then it will generate a stub for the call edge between b and c which will
+  // save the TOC pointer into the designated stack slot allocated by b. If we
+  // return true here, and therefore allow a tail call between b and c, that
+  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
+  // pointer into the stack slot allocated by a (where the a -> b stub saved
+  // a's TOC base pointer). If we're not considering a tail call, but rather,
+  // whether a nop is needed after the call instruction in b, because the linker
+  // will insert a stub, it might complain about a missing nop if we omit it
+  // (although many don't complain in this case).
+  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
     return false;
 
   return true;
@@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
       !isa<ExternalSymbolSDNode>(Callee))
     return false;
 
-  // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
-  // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
-  // module.
+  // Check if Callee resides in the same section, because for now, PPC64 SVR4
+  // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
+  // section.
   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
-  if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
+  if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
     return false;
 
   // TCO allows altering callee ABI, so we don't have to check further.
@@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
   return CallOpc;
 }
 
-static
-bool isLocalCall(const SDValue &Callee)
-{
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    return G->getGlobal()->isStrongDefinitionForLinker();
-  return false;
-}
-
 SDValue PPCTargetLowering::LowerCallResult(
     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall(
   // stack frame. If caller and callee belong to the same module (and have the
   // same TOC), the NOP will remain unchanged.
 
+  MachineFunction &MF = DAG.getMachineFunction();
   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
       !isPatchPoint) {
     if (CallOpc == PPCISD::BCTRL) {
@@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall(
       // The address needs to go after the chain input but before the flag (or
       // any other variadic arguments).
       Ops.insert(std::next(Ops.begin()), AddTOC);
-    } else if ((CallOpc == PPCISD::CALL) &&
-               (!isLocalCall(Callee) ||
-                DAG.getTarget().getRelocationModel() == Reloc::PIC_))
+    } else if (CallOpc == PPCISD::CALL &&
+      !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
       // Otherwise insert NOP for non-local calls.
       CallOpc = PPCISD::CALL_NOP;
+    }
   }
 
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index d42e1187ce64..e1825ca1eda1 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   EmitFunctionBody();
 
   // Emit the XRay table for this function.
-  EmitXRayTable();
+  emitXRayTable();
 
   // We didn't modify anything.
   return false;
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 1deefe1231ca..cd690442bb9f 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
   MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
                                                        : std::next(MBBI);
+  PI = skipDebugInstructionsBackward(PI, MBB.begin());
+  if (NI != nullptr)
+    NI = skipDebugInstructionsForward(NI, MBB.end());
+
   unsigned Opc = PI->getOpcode();
   int Offset = 0;
 
@@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
   uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
   I = MBB.erase(I);
+  auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
 
   if (!reserveCallFrame) {
     // If the stack pointer can be changed after prologue, turn the
@@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
     if (HasDwarfEHHandlers && !isDestroy &&
         MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
-      BuildCFI(MBB, I, DL,
+      BuildCFI(MBB, InsertPos, DL,
                MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
 
     if (Amount == 0)
@@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // If this is a callee-pop calling convention, emit a CFA adjust for
     // the amount the callee popped.
     if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
-      BuildCFI(MBB, I, DL, 
+      BuildCFI(MBB, InsertPos, DL,
                MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
 
     // Add Amount to SP to destroy a frame, or subtract to setup.
@@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // Merge with any previous or following adjustment instruction. Note: the
       // instructions merged with here do not have CFI, so their stack
       // adjustments do not feed into CfaAdjustment.
-      StackAdjustment += mergeSPUpdates(MBB, I, true);
-      StackAdjustment += mergeSPUpdates(MBB, I, false);
+      StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
+      StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
 
       if (StackAdjustment) {
         if (!(Fn->optForMinSize() &&
-              adjustStackWithPops(MBB, I, DL, StackAdjustment)))
-          BuildStackAdjustment(MBB, I, DL, StackAdjustment,
+              adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
+          BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
                                /*InEpilogue=*/false);
       }
     }
@@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // TODO: When not using precise CFA, we also need to adjust for the
       // InternalAmt here.
       if (CfaAdjustment) {
-        BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
-                                 nullptr, CfaAdjustment));
+        BuildCFI(MBB, InsertPos, DL,
+                 MCCFIInstruction::createAdjustCfaOffset(nullptr,
+                                                         CfaAdjustment));
       }
     }
 
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index b293dfa98f82..fd2189397279 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
                                         const SmallBitVector &Zeroable,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
+  SmallVector<int, 4> WidenedMask;
+  if (!canWidenShuffleElements(Mask, WidenedMask))
+    return SDValue();
+
   // TODO: If minimizing size and one of the inputs is a zero vector and the
   // the zero vector has only one use, we could use a VPERM2X128 to save the
   // instruction bytes needed to explicitly generate the zero vector.
@@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
   //    [6]   - ignore
   //    [7]   - zero high half of destination
 
-  int MaskLO = Mask[0];
-  if (MaskLO == SM_SentinelUndef)
-    MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
+  int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
+  int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];
 
-  int MaskHI = Mask[2];
-  if (MaskHI == SM_SentinelUndef)
-    MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
-
-  unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
+  unsigned PermMask = MaskLO | (MaskHI << 4);
 
   // If either input is a zero vector, replace it with an undef input.
   // Shuffle mask values <  4 are selecting elements of V1.
@@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
   // selecting the zero vector and setting the zero mask bit.
   if (IsV1Zero) {
     V1 = DAG.getUNDEF(VT);
-    if (MaskLO < 4)
+    if (MaskLO < 2)
       PermMask = (PermMask & 0xf0) | 0x08;
-    if (MaskHI < 4)
+    if (MaskHI < 2)
       PermMask = (PermMask & 0x0f) | 0x80;
   }
   if (IsV2Zero) {
     V2 = DAG.getUNDEF(VT);
-    if (MaskLO >= 4)
+    if (MaskLO >= 2)
       PermMask = (PermMask & 0xf0) | 0x08;
-    if (MaskHI >= 4)
+    if (MaskHI >= 2)
       PermMask = (PermMask & 0x0f) | 0x80;
   }
 
@@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
 
-  SmallVector<int, 4> WidenedMask;
-  if (canWidenShuffleElements(Mask, WidenedMask))
-    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
-                                             Zeroable, Subtarget, DAG))
-      return V;
+  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
+                                           Zeroable, Subtarget, DAG))
+    return V;
 
   if (V2.isUndef()) {
     // Check for being able to broadcast a single element.
@@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
   assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
 
-  SmallVector<int, 4> WidenedMask;
-  if (canWidenShuffleElements(Mask, WidenedMask))
-    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
-                                             Zeroable, Subtarget, DAG))
-      return V;
+  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
+                                           Zeroable, Subtarget, DAG))
+    return V;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
                                                 Zeroable, Subtarget, DAG))
@@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
   if (!canWidenShuffleElements(Mask, WidenedMask))
     return SDValue();
 
-  SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
-  // Insure elements came from the same Op.
-  int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
-  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
-    if (WidenedMask[i] == SM_SentinelZero)
-      return SDValue();
-    if (WidenedMask[i] == SM_SentinelUndef)
+  // Check for patterns which can be matched with a single insert of a 256-bit
+  // subvector.
+  bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
+                                        {0, 1, 2, 3, 0, 1, 2, 3});
+  if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
+                                        {0, 1, 2, 3, 8, 9, 10, 11})) {
+    MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
+    SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+                              DAG.getIntPtrConstant(0, DL));
+    SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+                              OnlyUsesV1 ? V1 : V2,
+                              DAG.getIntPtrConstant(0, DL));
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+  }
+
+  assert(WidenedMask.size() == 4);
+
+  // See if this is an insertion of the lower 128-bits of V2 into V1.
+  bool IsInsert = true;
+  int V2Index = -1;
+  for (int i = 0; i < 4; ++i) {
+    assert(WidenedMask[i] >= -1);
+    if (WidenedMask[i] < 0)
       continue;
 
-    SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1;
-    unsigned OpIndex = (i < Size/2) ? 0 : 1;
+    // Make sure all V1 subvectors are in place.
+    if (WidenedMask[i] < 4) {
+      if (WidenedMask[i] != i) {
+        IsInsert = false;
+        break;
+      }
+    } else {
+      // Make sure we only have a single V2 index and its the lowest 128-bits.
+      if (V2Index >= 0 || WidenedMask[i] != 4) {
+        IsInsert = false;
+        break;
+      }
+      V2Index = i;
+    }
+  }
+  if (IsInsert && V2Index >= 0) {
+    MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
+    SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
+                                 DAG.getIntPtrConstant(0, DL));
+    return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
+  }
+
+  // Try to lower to to vshuf64x2/vshuf32x4.
+  SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
+  unsigned PermMask = 0;
+  // Insure elements came from the same Op.
+  for (int i = 0; i < 4; ++i) {
+    assert(WidenedMask[i] >= -1);
+    if (WidenedMask[i] < 0)
+      continue;
+
+    SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
+    unsigned OpIndex = i / 2;
     if (Ops[OpIndex].isUndef())
       Ops[OpIndex] = Op;
     else if (Ops[OpIndex] != Op)
       return SDValue();
-  }
 
-  // Form a 128-bit permutation.
-  // Convert the 64-bit shuffle mask selection values into 128-bit selection
-  // bits defined by a vshuf64x2 instruction's immediate control byte.
-  unsigned PermMask = 0, Imm = 0;
-  unsigned ControlBitsNum = WidenedMask.size() / 2;
-
-  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
-    // Use first element in place of undef mask.
-    Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
-    PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
+    // Convert the 128-bit shuffle mask selection values into 128-bit selection
+    // bits defined by a vshuf64x2 instruction's immediate control byte.
+    PermMask |= (WidenedMask[i] % 4) << (i * 2);
   }
 
   return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
@@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
   int NumElements = Mask.size();
 
-  int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0;
+  int NumV1Elements = 0, NumV2Elements = 0;
   for (int M : Mask)
     if (M < 0)
-      ++NumSentinelElements;
+      continue;
     else if (M < NumElements)
       ++NumV1Elements;
     else
@@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
                                   Mask, PassThru, Subtarget, DAG);
     }
     case INTR_TYPE_3OP_IMM8_MASK:
-    case INTR_TYPE_3OP_MASK:
-    case INSERT_SUBVEC: {
+    case INTR_TYPE_3OP_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
@@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
 
       if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
         Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
-      else if (IntrData->Type == INSERT_SUBVEC) {
-        // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
-        assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
-        unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
-        Imm *= Src2.getSimpleValueType().getVectorNumElements();
-        Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
-      }
 
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
@@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
     return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
                                     Op.getOperand(2));
   }
+  case ISD::INSERT_SUBVECTOR: {
+    unsigned EltSize = EltVT.getSizeInBits();
+    if (EltSize != 32 && EltSize != 64)
+      return false;
+    MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+    // Only change element size, not type.
+    if (VT.isInteger() != OpEltVT.isInteger())
+      return false;
+    uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+    Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+    SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
+    DCI.AddToWorklist(Op0.getNode());
+    // Op1 needs to be bitcasted to a smaller vector with the same element type.
+    SDValue Op1 = Op.getOperand(1);
+    MVT Op1VT = MVT::getVectorVT(EltVT,
+                            Op1.getSimpleValueType().getSizeInBits() / EltSize);
+    Op1 = DAG.getBitcast(Op1VT, Op1);
+    DCI.AddToWorklist(Op1.getNode());
+    DCI.CombineTo(OrigOp.getNode(),
+                  DAG.getNode(Opcode, DL, VT, Op0, Op1,
+                              DAG.getConstant(Imm, DL, MVT::i8)));
+    return true;
+  }
   }
 
   return false;
@@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
+/// the codegen.
+/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
+static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
+                                          const X86Subtarget &Subtarget,
+                                          SDLoc &DL) {
+  assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
+  SDValue Src = N->getOperand(0);
+  unsigned Opcode = Src.getOpcode();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  EVT VT = N->getValueType(0);
+  EVT SrcVT = Src.getValueType();
+
+  auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
+    // TODO: Add extra cases where we can truncate both inputs for the
+    // cost of one (or none).
+    // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
+    if (Op0 == Op1)
+      return true;
+
+    SDValue BC0 = peekThroughOneUseBitcasts(Op0);
+    SDValue BC1 = peekThroughOneUseBitcasts(Op1);
+    return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
+           ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
+  };
+
+  auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
+    SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
+    SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
+  };
+
+  // Don't combine if the operation has other uses.
+  if (!N->isOnlyUserOf(Src.getNode()))
+    return SDValue();
+
+  // Only support vector truncation for now.
+  // TODO: i64 scalar math would benefit as well.
+  if (!VT.isVector())
+    return SDValue();
+
+  // In most cases its only worth pre-truncating if we're only facing the cost
+  // of one truncation.
+  // i.e. if one of the inputs will constant fold or the input is repeated.
+  switch (Opcode) {
+  case ISD::AND:
+  case ISD::XOR:
+  case ISD::OR: {
+    SDValue Op0 = Src.getOperand(0);
+    SDValue Op1 = Src.getOperand(1);
+    if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
+        IsRepeatedOpOrOneUseConstant(Op0, Op1))
+      return TruncateArithmetic(Op0, Op1);
+    break;
+  }
+
+  case ISD::MUL:
+    // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
+    // better to truncate if we have the chance.
+    if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
+        !TLI.isOperationLegal(Opcode, SrcVT))
+      return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
+    LLVM_FALLTHROUGH;
+  case ISD::ADD: {
+    SDValue Op0 = Src.getOperand(0);
+    SDValue Op1 = Src.getOperand(1);
+    if (TLI.isOperationLegal(Opcode, VT) &&
+        IsRepeatedOpOrOneUseConstant(Op0, Op1))
+      return TruncateArithmetic(Op0, Op1);
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
 /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
 static SDValue
 combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
@@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
   SDValue Src = N->getOperand(0);
   SDLoc DL(N);
 
+  // Attempt to pre-truncate inputs to arithmetic ops instead.
+  if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
+    return V;
+
   // Try to detect AVG pattern first.
   if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
     return Avg;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index da7437ea0ccb..908053e1342d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -650,33 +650,6 @@ multiclass vextract_for_size<int Opcode,
                                 From.ZSuffix # "rrkz")
                 To.KRCWM:$mask, From.RC:$src1,
                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
-
-  // Intrinsic call with masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rrk")
-                To.RC:$src0,
-                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
-                From.RC:$src1, imm:$idx)>;
-
-  // Intrinsic call with zero-masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rrkz")
-                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
-                From.RC:$src1, imm:$idx)>;
-
-  // Intrinsic call without masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rr")
-                From.RC:$src1, imm:$idx)>;
 }
 
 // Codegen pattern for the alternative types
@@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   }
   let isCodeGenOnly = 1 in {
-    defm Int_VUCOMISSZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
-                              load, "ucomiss">, PS, EVEX, VEX_LIG,
+    defm Int_VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
+                              sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
                               EVEX_CD8<32, CD8VT1>;
-    defm Int_VUCOMISDZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
-                              load, "ucomisd">, PD, EVEX,
+    defm Int_VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
+                              sse_load_f64, "ucomisd">, PD, EVEX,
                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
 
-    defm Int_VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
-                              load, "comiss">, PS, EVEX, VEX_LIG,
+    defm Int_VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
+                              sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
                               EVEX_CD8<32, CD8VT1>;
-    defm Int_VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
-                              load, "comisd">, PD, EVEX,
+    defm Int_VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
+                              sse_load_f64, "comisd">, PD, EVEX,
                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   }
 }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 9d6a89363044..4cd6ae563f03 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
           Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 
+// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
+multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
+                            ValueType vt, Operand memop,
+                            ComplexPattern mem_cpat, string OpcodeStr> {
+  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
+                     IIC_SSE_COMIS_RR>,
+          Sched<[WriteFAdd]>;
+  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1),
+                                           mem_cpat:$src2))],
+                                           IIC_SSE_COMIS_RM>,
+          Sched<[WriteFAddLd, ReadAfterLd]>;
+}
+
 let Defs = [EFLAGS] in {
   defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                   "ucomiss">, PS, VEX, VEX_LIG;
@@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in {
   }
 
   let isCodeGenOnly = 1 in {
-    defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
-                              load, "ucomiss">, PS, VEX;
-    defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
-                              load, "ucomisd">, PD, VEX;
+    defm Int_VUCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+                              sse_load_f32, "ucomiss">, PS, VEX;
+    defm Int_VUCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+                              sse_load_f64, "ucomisd">, PD, VEX;
 
-    defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
-                              load, "comiss">, PS, VEX;
-    defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
-                              load, "comisd">, PD, VEX;
+    defm Int_VCOMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+                              sse_load_f32, "comiss">, PS, VEX;
+    defm Int_VCOMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+                              sse_load_f64, "comisd">, PD, VEX;
   }
   defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                   "ucomiss">, PS;
@@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in {
   }
 
   let isCodeGenOnly = 1 in {
-    defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
-                                load, "ucomiss">, PS;
-    defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
-                                load, "ucomisd">, PD;
+    defm Int_UCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+                                sse_load_f32, "ucomiss">, PS;
+    defm Int_UCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+                                sse_load_f64, "ucomisd">, PD;
 
-    defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
-                                    "comiss">, PS;
-    defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
-                                    "comisd">, PD;
+    defm Int_COMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+                                    sse_load_f32, "comiss">, PS;
+    defm Int_COMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+                                    sse_load_f64, "comisd">, PD;
   }
 } // Defs = [EFLAGS]
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h b/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h
index 5d2af829028a..415a891bfd97 100755
--- a/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h
@@ -1,4 +1,4 @@
-//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===//
+//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry {
 
 // X86 EVEX encoded instructions that have a VEX 128 encoding
 // (table format: <EVEX opcode, VEX-128 opcode>).
-static const X86EvexToVexCompressTableEntry 
-   X86EvexToVex128CompressTable[] = {
+static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
   // EVEX scalar with corresponding VEX.
   { X86::Int_VCOMISDZrm         ,  X86::Int_VCOMISDrm            },
   { X86::Int_VCOMISDZrr         ,  X86::Int_VCOMISDrr            },
@@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry
   { X86::VUCOMISDZrr            ,  X86::VUCOMISDrr               },
   { X86::VUCOMISSZrm            ,  X86::VUCOMISSrm               },
   { X86::VUCOMISSZrr            ,  X86::VUCOMISSrr               },
-                                                                               
+
   { X86::VMOV64toPQIZrr         ,   X86::VMOV64toPQIrr           },
   { X86::VMOV64toSDZrr          ,   X86::VMOV64toSDrr            },
   { X86::VMOVDI2PDIZrm          ,   X86::VMOVDI2PDIrm            },
   { X86::VMOVDI2PDIZrr          ,   X86::VMOVDI2PDIrr            },
   { X86::VMOVLHPSZrr            ,   X86::VMOVLHPSrr              },
-  { X86::VMOVHLPSZrr            ,   X86::VMOVHLPSrr              },  
+  { X86::VMOVHLPSZrr            ,   X86::VMOVHLPSrr              },
   { X86::VMOVPDI2DIZmr          ,   X86::VMOVPDI2DImr            },
   { X86::VMOVPDI2DIZrr          ,   X86::VMOVPDI2DIrr            },
   { X86::VMOVPQI2QIZmr          ,   X86::VMOVPQI2QImr            },
   { X86::VMOVPQIto64Zrr         ,   X86::VMOVPQIto64rr           },
   { X86::VMOVQI2PQIZrm          ,   X86::VMOVQI2PQIrm            },
   { X86::VMOVZPQILo2PQIZrr      ,   X86::VMOVZPQILo2PQIrr        },
-                                                                               
+
   { X86::VPEXTRBZmr             ,   X86::VPEXTRBmr               },
   { X86::VPEXTRBZrr             ,   X86::VPEXTRBrr               },
   { X86::VPEXTRDZmr             ,   X86::VPEXTRDmr               },
@@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPEXTRQZrr             ,   X86::VPEXTRQrr               },
   { X86::VPEXTRWZmr             ,   X86::VPEXTRWmr               },
   { X86::VPEXTRWZrr             ,   X86::VPEXTRWri               },
-                                                                               
+
   { X86::VPINSRBZrm             ,   X86::VPINSRBrm               },
   { X86::VPINSRBZrr             ,   X86::VPINSRBrr               },
   { X86::VPINSRDZrm             ,   X86::VPINSRDrm               },
@@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VANDPDZ128rm           ,    X86::VANDPDrm               },
   { X86::VANDPDZ128rr           ,    X86::VANDPDrr               },
   { X86::VANDPSZ128rm           ,    X86::VANDPSrm               },
-  { X86::VANDPSZ128rr           ,    X86::VANDPSrr               },      
+  { X86::VANDPSZ128rr           ,    X86::VANDPSrr               },
   { X86::VBROADCASTSSZ128m      ,    X86::VBROADCASTSSrm         },
   { X86::VBROADCASTSSZ128r      ,    X86::VBROADCASTSSrr         },
   { X86::VBROADCASTSSZ128r_s    ,    X86::VBROADCASTSSrr         },
@@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry
   { X86::VMOVAPDZ128rm          ,    X86::VMOVAPDrm              },
   { X86::VMOVAPDZ128rr          ,    X86::VMOVAPDrr              },
   { X86::VMOVAPDZ128rr_REV      ,    X86::VMOVAPDrr_REV          },
-  { X86::VMOVAPSZ128mr          ,    X86::VMOVAPSmr              },    
-  { X86::VMOVAPSZ128rm          ,    X86::VMOVAPSrm              },    
+  { X86::VMOVAPSZ128mr          ,    X86::VMOVAPSmr              },
+  { X86::VMOVAPSZ128rm          ,    X86::VMOVAPSrm              },
   { X86::VMOVAPSZ128rr          ,    X86::VMOVAPSrr              },
   { X86::VMOVAPSZ128rr_REV      ,    X86::VMOVAPSrr_REV          },
   { X86::VMOVDDUPZ128rm         ,    X86::VMOVDDUPrm             },
@@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry
   { X86::VMOVUPDZ128rm          ,    X86::VMOVUPDrm              },
   { X86::VMOVUPDZ128rr          ,    X86::VMOVUPDrr              },
   { X86::VMOVUPDZ128rr_REV      ,    X86::VMOVUPDrr_REV          },
-  { X86::VMOVUPSZ128mr          ,    X86::VMOVUPSmr              },    
-  { X86::VMOVUPSZ128rm          ,    X86::VMOVUPSrm              },    
+  { X86::VMOVUPSZ128mr          ,    X86::VMOVUPSmr              },
+  { X86::VMOVUPSZ128rm          ,    X86::VMOVUPSrm              },
   { X86::VMOVUPSZ128rr          ,    X86::VMOVUPSrr              },
   { X86::VMOVUPSZ128rr_REV      ,    X86::VMOVUPSrr_REV          },
   { X86::VMULPDZ128rm           ,    X86::VMULPDrm               },
@@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPBROADCASTBZ128r      ,    X86::VPBROADCASTBrr         },
   { X86::VPBROADCASTDZ128m      ,    X86::VPBROADCASTDrm         },
   { X86::VPBROADCASTDZ128r      ,    X86::VPBROADCASTDrr         },
-  { X86::VPBROADCASTQZ128m      ,    X86::VPBROADCASTQrm         }, 
-  { X86::VPBROADCASTQZ128r      ,    X86::VPBROADCASTQrr         }, 
-  { X86::VPBROADCASTWZ128m      ,    X86::VPBROADCASTWrm         },    
+  { X86::VPBROADCASTQZ128m      ,    X86::VPBROADCASTQrm         },
+  { X86::VPBROADCASTQZ128r      ,    X86::VPBROADCASTQrr         },
+  { X86::VPBROADCASTWZ128m      ,    X86::VPBROADCASTWrm         },
   { X86::VPBROADCASTWZ128r      ,    X86::VPBROADCASTWrr         },
   { X86::VPERMILPDZ128mi        ,    X86::VPERMILPDmi            },
   { X86::VPERMILPDZ128ri        ,    X86::VPERMILPDri            },
@@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPMOVZXWDZ128rm        ,    X86::VPMOVZXWDrm            },
   { X86::VPMOVZXWDZ128rr        ,    X86::VPMOVZXWDrr            },
   { X86::VPMOVZXWQZ128rm        ,    X86::VPMOVZXWQrm            },
-  { X86::VPMOVZXWQZ128rr        ,    X86::VPMOVZXWQrr            },    
+  { X86::VPMOVZXWQZ128rr        ,    X86::VPMOVZXWQrr            },
   { X86::VPMULDQZ128rm          ,    X86::VPMULDQrm              },
   { X86::VPMULDQZ128rr          ,    X86::VPMULDQrr              },
   { X86::VPMULHRSWZ128rm        ,    X86::VPMULHRSWrm            },
@@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSHUFHWZ128ri         ,    X86::VPSHUFHWri             },
   { X86::VPSHUFLWZ128mi         ,    X86::VPSHUFLWmi             },
   { X86::VPSHUFLWZ128ri         ,    X86::VPSHUFLWri             },
-  { X86::VPSLLDQZ128rr          ,    X86::VPSLLDQri              },    
+  { X86::VPSLLDQZ128rr          ,    X86::VPSLLDQri              },
   { X86::VPSLLDZ128ri           ,    X86::VPSLLDri               },
   { X86::VPSLLDZ128rm           ,    X86::VPSLLDrm               },
-  { X86::VPSLLDZ128rr           ,    X86::VPSLLDrr               },    
+  { X86::VPSLLDZ128rr           ,    X86::VPSLLDrr               },
   { X86::VPSLLQZ128ri           ,    X86::VPSLLQri               },
   { X86::VPSLLQZ128rm           ,    X86::VPSLLQrm               },
   { X86::VPSLLQZ128rr           ,    X86::VPSLLQrr               },
@@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry
 
 // X86 EVEX encoded instructions that have a VEX 256 encoding
 // (table format: <EVEX opcode, VEX-256 opcode>).
- static const X86EvexToVexCompressTableEntry 
-   X86EvexToVex256CompressTable[] = {
+ static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
   { X86::VADDPDZ256rm           ,     X86::VADDPDYrm             },
   { X86::VADDPDZ256rr           ,     X86::VADDPDYrr             },
   { X86::VADDPSZ256rm           ,     X86::VADDPSYrm             },
@@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry
   { X86::VANDPDZ256rr           ,     X86::VANDPDYrr             },
   { X86::VANDPSZ256rm           ,     X86::VANDPSYrm             },
   { X86::VANDPSZ256rr           ,     X86::VANDPSYrr             },
-  { X86::VBROADCASTSDZ256m      ,     X86::VBROADCASTSDYrm       }, 
-  { X86::VBROADCASTSDZ256r      ,     X86::VBROADCASTSDYrr       }, 
-  { X86::VBROADCASTSDZ256r_s    ,     X86::VBROADCASTSDYrr       }, 
+  { X86::VBROADCASTSDZ256m      ,     X86::VBROADCASTSDYrm       },
+  { X86::VBROADCASTSDZ256r      ,     X86::VBROADCASTSDYrr       },
+  { X86::VBROADCASTSDZ256r_s    ,     X86::VBROADCASTSDYrr       },
   { X86::VBROADCASTSSZ256m      ,     X86::VBROADCASTSSYrm       },
-  { X86::VBROADCASTSSZ256r      ,     X86::VBROADCASTSSYrr       }, 
+  { X86::VBROADCASTSSZ256r      ,     X86::VBROADCASTSSYrr       },
   { X86::VBROADCASTSSZ256r_s    ,     X86::VBROADCASTSSYrr       },
   { X86::VCVTDQ2PDZ256rm        ,     X86::VCVTDQ2PDYrm          },
   { X86::VCVTDQ2PDZ256rr        ,     X86::VCVTDQ2PDYrr          },
@@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry
   { X86::VDIVPDZ256rr           ,     X86::VDIVPDYrr             },
   { X86::VDIVPSZ256rm           ,     X86::VDIVPSYrm             },
   { X86::VDIVPSZ256rr           ,     X86::VDIVPSYrr             },
+  { X86::VEXTRACTF32x4Z256mr    ,    X86::VEXTRACTF128mr         },
+  { X86::VEXTRACTF64x2Z256mr    ,    X86::VEXTRACTF128mr         },
+  { X86::VEXTRACTF32x4Z256rr    ,    X86::VEXTRACTF128rr         },
+  { X86::VEXTRACTF64x2Z256rr    ,    X86::VEXTRACTF128rr         },
+  { X86::VEXTRACTI32x4Z256mr    ,    X86::VEXTRACTI128mr         },
+  { X86::VEXTRACTI64x2Z256mr    ,    X86::VEXTRACTI128mr         },
+  { X86::VEXTRACTI32x4Z256rr    ,    X86::VEXTRACTI128rr         },
+  { X86::VEXTRACTI64x2Z256rr    ,    X86::VEXTRACTI128rr         },
   { X86::VFMADD132PDZ256m       ,     X86::VFMADD132PDYm         },
   { X86::VFMADD132PDZ256r       ,     X86::VFMADD132PDYr         },
   { X86::VFMADD132PSZ256m       ,     X86::VFMADD132PSYm         },
@@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry
   { X86::VFNMSUB231PDZ256r      ,     X86::VFNMSUB231PDYr        },
   { X86::VFNMSUB231PSZ256m      ,     X86::VFNMSUB231PSYm        },
   { X86::VFNMSUB231PSZ256r      ,     X86::VFNMSUB231PSYr        },
+  { X86::VINSERTF32x4Z256rm     ,    X86::VINSERTF128rm          },
+  { X86::VINSERTF64x2Z256rm     ,    X86::VINSERTF128rm          },
+  { X86::VINSERTF32x4Z256rr     ,    X86::VINSERTF128rr          },
+  { X86::VINSERTF64x2Z256rr     ,    X86::VINSERTF128rr          },
+  { X86::VINSERTI32x4Z256rm     ,    X86::VINSERTI128rm          },
+  { X86::VINSERTI64x2Z256rm     ,    X86::VINSERTI128rm          },
+  { X86::VINSERTI32x4Z256rr     ,    X86::VINSERTI128rr          },
+  { X86::VINSERTI64x2Z256rr     ,    X86::VINSERTI128rr          },
   { X86::VMAXCPDZ256rm          ,     X86::VMAXCPDYrm            },
   { X86::VMAXCPDZ256rr          ,     X86::VMAXCPDYrr            },
   { X86::VMAXCPSZ256rm          ,     X86::VMAXCPSYrm            },
@@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry
   { X86::VMOVAPDZ256rm          ,     X86::VMOVAPDYrm            },
   { X86::VMOVAPDZ256rr          ,     X86::VMOVAPDYrr            },
   { X86::VMOVAPDZ256rr_REV      ,     X86::VMOVAPDYrr_REV        },
-  { X86::VMOVAPSZ256mr          ,     X86::VMOVAPSYmr            },    
-  { X86::VMOVAPSZ256rm          ,     X86::VMOVAPSYrm            },    
+  { X86::VMOVAPSZ256mr          ,     X86::VMOVAPSYmr            },
+  { X86::VMOVAPSZ256rm          ,     X86::VMOVAPSYrm            },
   { X86::VMOVAPSZ256rr          ,     X86::VMOVAPSYrr            },
   { X86::VMOVAPSZ256rr_REV      ,     X86::VMOVAPSYrr_REV        },
   { X86::VMOVDDUPZ256rm         ,     X86::VMOVDDUPYrm           },
@@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPAVGBZ256rr           ,     X86::VPAVGBYrr             },
   { X86::VPAVGWZ256rm           ,     X86::VPAVGWYrm             },
   { X86::VPAVGWZ256rr           ,     X86::VPAVGWYrr             },
-  { X86::VPBROADCASTBZ256m      ,     X86::VPBROADCASTBYrm       }, 
-  { X86::VPBROADCASTBZ256r      ,     X86::VPBROADCASTBYrr       },   
-  { X86::VPBROADCASTDZ256m      ,     X86::VPBROADCASTDYrm       }, 
-  { X86::VPBROADCASTDZ256r      ,     X86::VPBROADCASTDYrr       },   
-  { X86::VPBROADCASTQZ256m      ,     X86::VPBROADCASTQYrm       }, 
-  { X86::VPBROADCASTQZ256r      ,     X86::VPBROADCASTQYrr       }, 
-  { X86::VPBROADCASTWZ256m      ,     X86::VPBROADCASTWYrm       }, 
-  { X86::VPBROADCASTWZ256r      ,     X86::VPBROADCASTWYrr       }, 
+  { X86::VPBROADCASTBZ256m      ,     X86::VPBROADCASTBYrm       },
+  { X86::VPBROADCASTBZ256r      ,     X86::VPBROADCASTBYrr       },
+  { X86::VPBROADCASTDZ256m      ,     X86::VPBROADCASTDYrm       },
+  { X86::VPBROADCASTDZ256r      ,     X86::VPBROADCASTDYrr       },
+  { X86::VPBROADCASTQZ256m      ,     X86::VPBROADCASTQYrm       },
+  { X86::VPBROADCASTQZ256r      ,     X86::VPBROADCASTQYrr       },
+  { X86::VPBROADCASTWZ256m      ,     X86::VPBROADCASTWYrm       },
+  { X86::VPBROADCASTWZ256r      ,     X86::VPBROADCASTWYrr       },
   { X86::VPERMDZ256rm           ,     X86::VPERMDYrm             },
   { X86::VPERMDZ256rr           ,     X86::VPERMDYrr             },
   { X86::VPERMILPDZ256mi        ,     X86::VPERMILPDYmi          },
@@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSLLDQZ256rr          ,     X86::VPSLLDQYri            },
   { X86::VPSLLDZ256ri           ,     X86::VPSLLDYri             },
   { X86::VPSLLDZ256rm           ,     X86::VPSLLDYrm             },
-  { X86::VPSLLDZ256rr           ,     X86::VPSLLDYrr             },    
+  { X86::VPSLLDZ256rr           ,     X86::VPSLLDYrr             },
   { X86::VPSLLQZ256ri           ,     X86::VPSLLQYri             },
   { X86::VPSLLQZ256rm           ,     X86::VPSLLQYrm             },
   { X86::VPSLLQZ256rr           ,     X86::VPSLLQYrr             },
@@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSLLVQZ256rr          ,     X86::VPSLLVQYrr            },
   { X86::VPSLLWZ256ri           ,     X86::VPSLLWYri             },
   { X86::VPSLLWZ256rm           ,     X86::VPSLLWYrm             },
-  { X86::VPSLLWZ256rr           ,     X86::VPSLLWYrr             },    
+  { X86::VPSLLWZ256rr           ,     X86::VPSLLWYrr             },
   { X86::VPSRADZ256ri           ,     X86::VPSRADYri             },
   { X86::VPSRADZ256rm           ,     X86::VPSRADYrm             },
   { X86::VPSRADZ256rr           ,     X86::VPSRADYrr             },
@@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry
   { X86::VPSRLDQZ256rr          ,     X86::VPSRLDQYri            },
   { X86::VPSRLDZ256ri           ,     X86::VPSRLDYri             },
   { X86::VPSRLDZ256rm           ,     X86::VPSRLDYrm             },
-  { X86::VPSRLDZ256rr           ,     X86::VPSRLDYrr             },   
+  { X86::VPSRLDZ256rr           ,     X86::VPSRLDYrr             },
   { X86::VPSRLQZ256ri           ,     X86::VPSRLQYri             },
   { X86::VPSRLQZ256rm           ,     X86::VPSRLQYrm             },
   { X86::VPSRLQZ256rr           ,     X86::VPSRLQYrr             },
@@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry
   { X86::VXORPSZ256rr           ,     X86::VXORPSYrr             },
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index df47b4ad583d..63a02af02faa 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t {
   INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
   COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
-  EXPAND_FROM_MEM, INSERT_SUBVEC,
+  EXPAND_FROM_MEM,
   TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
   FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
 };
@@ -795,30 +795,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VGETMANTS, 0),
   X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
                      X86ISD::VGETMANTS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
                      ISD::CTLZ, 0),
   X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index 2f69df064e7f..a38a4b30b77d 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo
   OutStreamer->EmitInstruction(TC, getSubtargetInfo());
 }
 
-void X86AsmPrinter::EmitXRayTable() {
-  if (Sleds.empty())
-    return;
-  
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  auto Fn = MF->getFunction();
-  MCSection *Section = nullptr;
-  if (Subtarget->isTargetELF()) {
-    if (Fn->hasComdat()) {
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-                                         Fn->getComdat()->getName());
-    } else {
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC);
-    }
-  } else if (Subtarget->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  // Before we switch over, we force a reference to a label inside the
-  // xray_instr_map section. Since EmitXRayTable() is always called just
-  // before the function's end, we assume that this is happening after the
-  // last return instruction.
-  //
-  // We then align the reference to 16 byte boundaries, which we determined
-  // experimentally to be beneficial to avoid causing decoder stalls.
-  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
-  OutStreamer->EmitCodeAlignment(16);
-  OutStreamer->EmitSymbolValue(Tmp, 8, false);
-  OutStreamer->SwitchSection(Section);
-  OutStreamer->EmitLabel(Tmp);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-        StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-        StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(14);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
-
 // Returns instruction preceding MBBI in MachineFunction.
 // If MBBI is the first instruction of the first basic block, returns null.
 static MachineBasicBlock::const_iterator
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 2b0e672d56f2..d7792e296a58 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -598,197 +598,135 @@ int X86TTIImpl::getArithmeticInstrCost(
 
 int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                Type *SubTp) {
-
-  if (Kind == TTI::SK_Reverse) {
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
-    static const CostTblEntry AVX512VBMIShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  1 }, // vpermb
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  1 }  // vpermb
-    };
-
-    if (ST->hasVBMI())
-      if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX512BWShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
-      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  6 }  // vextracti64x4 + 2*vperm2i128
-                                               // + 2*pshufb + vinserti64x4
-    };
-
-    if (ST->hasBWI())
-      if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX512ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v8f64,  1 }, // vpermpd
-      { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
-      { ISD::VECTOR_SHUFFLE, MVT::v8i64,  1 }, // vpermq
-      { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
-    };
-
-    if (ST->hasAVX512())
-      if (const auto *Entry =
-              CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX2ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  1 }, // vpermpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  1 }, // vpermps
-      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  1 }, // vpermq
-      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  1 }, // vpermd
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  2 }  // vperm2i128 + pshufb
-    };
-
-    if (ST->hasAVX2())
-      if (const auto *Entry =
-              CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX1ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  2 }, // vperm2f128 + vpermilps
-      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  2 }, // vperm2f128 + vpermilps
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
-                                               // + vinsertf128
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  4 }  // vextractf128 + 2*pshufb
-                                               // + vinsertf128
-    };
-
-    if (ST->hasAVX())
-      if (const auto *Entry =
-              CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSSE3ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
-      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 }  // pshufb
-    };
-
-    if (ST->hasSSSE3())
-      if (const auto *Entry =
-              CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSE2ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
-      { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw  + pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 }  // 2*pshuflw + 2*pshufhw
-                                              // + 2*pshufd + 2*unpck + packus
-    };
-
-    if (ST->hasSSE2())
-      if (const auto *Entry =
-              CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSE1ShuffleTbl[] = {
-        { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
-    };
-
-    if (ST->hasSSE1())
-      if (const auto *Entry =
-              CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-  } else if (Kind == TTI::SK_Alternate) {
+  if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
     // 64-bit packed float vectors (v2f32) are widened to type v4f32.
     // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
-    // The backend knows how to generate a single VEX.256 version of
-    // instruction VPBLENDW if the target supports AVX2.
-    if (ST->hasAVX2() && LT.second == MVT::v16i16)
-      return LT.first;
+    static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+      { TTI::SK_Reverse, MVT::v64i8,  1 }, // vpermb
+      { TTI::SK_Reverse, MVT::v32i8,  1 }  // vpermb
+    };
 
-    static const CostTblEntry AVXAltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1},  // vblendpd
-      {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1},  // vblendpd
+    if (ST->hasVBMI())
+      if (const auto *Entry =
+              CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
 
-      {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1},  // vblendps
-      {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1},  // vblendps
+    static const CostTblEntry AVX512BWShuffleTbl[] = {
+      { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+      { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+      { TTI::SK_Reverse, MVT::v64i8,  6 }  // vextracti64x4 + 2*vperm2i128
+                                           // + 2*pshufb + vinserti64x4
+    };
 
-      // This shuffle is custom lowered into a sequence of:
-      //  2x  vextractf128 , 2x vpblendw , 1x vinsertf128
-      {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
+    if (ST->hasBWI())
+      if (const auto *Entry =
+              CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
 
-      // This shuffle is custom lowered into a long sequence of:
-      //  2x vextractf128 , 4x vpshufb , 2x vpor ,  1x vinsertf128
-      {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
+    static const CostTblEntry AVX512ShuffleTbl[] = {
+      { TTI::SK_Reverse, MVT::v8f64,  1 }, // vpermpd
+      { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+      { TTI::SK_Reverse, MVT::v8i64,  1 }, // vpermq
+      { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
+    };
+
+    if (ST->hasAVX512())
+      if (const auto *Entry =
+              CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX2ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v4f64,  1 }, // vpermpd
+      { TTI::SK_Reverse,   MVT::v8f32,  1 }, // vpermps
+      { TTI::SK_Reverse,   MVT::v4i64,  1 }, // vpermq
+      { TTI::SK_Reverse,   MVT::v8i32,  1 }, // vpermd
+      { TTI::SK_Reverse,   MVT::v16i16, 2 }, // vperm2i128 + pshufb
+      { TTI::SK_Reverse,   MVT::v32i8,  2 }, // vperm2i128 + pshufb
+
+      { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+      { TTI::SK_Alternate, MVT::v32i8,  1 }  // vpblendvb
+    };
+
+    if (ST->hasAVX2())
+      if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX1ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
+      { TTI::SK_Reverse,   MVT::v8f32,  2 }, // vperm2f128 + vpermilps
+      { TTI::SK_Reverse,   MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
+      { TTI::SK_Reverse,   MVT::v8i32,  2 }, // vperm2f128 + vpermilps
+      { TTI::SK_Reverse,   MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+                                             // + vinsertf128
+      { TTI::SK_Reverse,   MVT::v32i8,  4 }, // vextractf128 + 2*pshufb
+                                             // + vinsertf128
+
+      { TTI::SK_Alternate, MVT::v4i64,  1 }, // vblendpd
+      { TTI::SK_Alternate, MVT::v4f64,  1 }, // vblendpd
+      { TTI::SK_Alternate, MVT::v8i32,  1 }, // vblendps
+      { TTI::SK_Alternate, MVT::v8f32,  1 }, // vblendps
+      { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+      { TTI::SK_Alternate, MVT::v32i8,  3 }  // vpand + vpandn + vpor
     };
 
     if (ST->hasAVX())
-      if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
-    static const CostTblEntry SSE41AltShuffleTbl[] = {
-      // These are lowered into movsd.
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
-
-      // packed float vectors with four elements are lowered into BLENDI dag
-      // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
-
-      // This shuffle generates a single pshufw.
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
-
-      // There is no instruction that matches a v16i8 alternate shuffle.
-      // The backend will expand it into the sequence 'pshufb + pshufb + or'.
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
+    static const CostTblEntry SSE41ShuffleTbl[] = {
+      { TTI::SK_Alternate, MVT::v2i64,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v4i32,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v4f32,  1 }, // blendps
+      { TTI::SK_Alternate, MVT::v8i16,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v16i8,  1 }  // pblendvb
     };
 
     if (ST->hasSSE41())
-      if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
-                                              LT.second))
+      if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
-    static const CostTblEntry SSSE3AltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
+    static const CostTblEntry SSSE3ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v8i16,  1 }, // pshufb
+      { TTI::SK_Reverse,   MVT::v16i8,  1 }, // pshufb
 
-      // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
-      // the sequence 'shufps + pshufd'
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
-
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}  // pshufb + pshufb + or
+      { TTI::SK_Alternate, MVT::v8i16,  3 }, // pshufb + pshufb + por
+      { TTI::SK_Alternate, MVT::v16i8,  3 }  // pshufb + pshufb + por
     };
 
     if (ST->hasSSSE3())
-      if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
         return LT.first * Entry->Cost;
 
-    static const CostTblEntry SSEAltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
+    static const CostTblEntry SSE2ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v2f64,  1 }, // shufpd
+      { TTI::SK_Reverse,   MVT::v2i64,  1 }, // pshufd
+      { TTI::SK_Reverse,   MVT::v4i32,  1 }, // pshufd
+      { TTI::SK_Reverse,   MVT::v8i16,  3 }, // pshuflw + pshufhw  + pshufd
+      { TTI::SK_Reverse,   MVT::v16i8,  9 }, // 2*pshuflw + 2*pshufhw
+                                             // + 2*pshufd + 2*unpck + packus
 
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
-
-      // This is expanded into a long sequence of four extract + four insert.
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
-
-      // 8 x (pinsrw + pextrw + and + movb + movzb + or)
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
+      { TTI::SK_Alternate, MVT::v2i64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v4i32,  2 }, // 2*shufps
+      { TTI::SK_Alternate, MVT::v8i16,  3 }, // pand + pandn + por
+      { TTI::SK_Alternate, MVT::v16i8,  3 }  // pand + pandn + por
     };
 
-    // Fall-back (SSE3 and SSE2).
-    if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
-                                            ISD::VECTOR_SHUFFLE, LT.second))
-      return LT.first * Entry->Cost;
+    if (ST->hasSSE2())
+      if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry SSE1ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v4f32,  1 }, // shufps
+      { TTI::SK_Alternate, MVT::v4f32,  2 }  // 2*shufps
+    };
+
+    if (ST->hasSSE1())
+      if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
 
   } else if (Kind == TTI::SK_PermuteTwoSrc) {
     // We assume that source and destination have the same vector type.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3bbc70ab21c6..55151c13b430 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       // add(zext(xor i16 X, -32768), -32768) --> sext X
       return CastInst::Create(Instruction::SExt, X, LHS->getType());
     }
+
+    if (Val->isNegative() &&
+        match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) &&
+        Val->sge(-C->sext(Val->getBitWidth()))) {
+      // (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val))
+      return CastInst::Create(
+          Instruction::ZExt,
+          Builder->CreateNUWAdd(
+              X, Constant::getIntegerValue(X->getType(),
+                                           *C + Val->trunc(C->getBitWidth()))),
+          I.getType());
+    }
   }
 
   // FIXME: Use the match above instead of dyn_cast to allow these transforms
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 92369bd70b13..f863d192fc2f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return replaceInstUsesWith(*II, V);
     break;
   }
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd: {
+    Value *Src0 = II->getArgOperand(0);
+    Value *Src1 = II->getArgOperand(1);
+
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+      II->setArgOperand(0, Src1);
+      II->setArgOperand(1, Src0);
+      std::swap(Src0, Src1);
+    }
+
+    Value *LHS = nullptr;
+    Value *RHS = nullptr;
+
+    // fma fneg(x), fneg(y), z -> fma x, y, z
+    if (match(Src0, m_FNeg(m_Value(LHS))) &&
+        match(Src1, m_FNeg(m_Value(RHS)))) {
+      CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+                                              {LHS, RHS, II->getArgOperand(2)});
+      NewCall->takeName(II);
+      NewCall->copyFastMathFlags(II);
+      return replaceInstUsesWith(*II, NewCall);
+    }
+
+    // fma fabs(x), fabs(x), z -> fma x, x, z
+    if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
+        match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
+      CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+                                              {LHS, LHS, II->getArgOperand(2)});
+      NewCall->takeName(II);
+      NewCall->copyFastMathFlags(II);
+      return replaceInstUsesWith(*II, NewCall);
+    }
+
+    // fma x, 1, z -> fadd x, z
+    if (match(Src1, m_FPOne())) {
+      Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
+      RI->copyFastMathFlags(II);
+      return RI;
+    }
+
+    break;
+  }
+  case Intrinsic::fabs: {
+    Value *Cond;
+    Constant *LHS, *RHS;
+    if (match(II->getArgOperand(0),
+              m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
+      CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
+      CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
+      return SelectInst::Create(Cond, Call0, Call1);
+    }
+
+    break;
+  }
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
     // Turn PPC lvx -> load if the pointer is known aligned.
@@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     // assume( (load addr) != null ) -> add 'nonnull' metadata to load
     // (if assume is valid at the load)
-    if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
-      Value *LHS = ICmp->getOperand(0);
-      Value *RHS = ICmp->getOperand(1);
-      if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
-          isa<LoadInst>(LHS) &&
-          isa<Constant>(RHS) &&
-          RHS->getType()->isPointerTy() &&
-          cast<Constant>(RHS)->isNullValue()) {
-        LoadInst* LI = cast<LoadInst>(LHS);
-        if (isValidAssumeForContext(II, LI, &DT)) {
-          MDNode *MD = MDNode::get(II->getContext(), None);
-          LI->setMetadata(LLVMContext::MD_nonnull, MD);
-          return eraseInstFromFunction(*II);
-        }
-      }
+    CmpInst::Predicate Pred;
+    Instruction *LHS;
+    if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
+        Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
+        LHS->getType()->isPointerTy() &&
+        isValidAssumeForContext(II, LHS, &DT)) {
+      MDNode *MD = MDNode::get(II->getContext(), None);
+      LHS->setMetadata(LLVMContext::MD_nonnull, MD);
+      return eraseInstFromFunction(*II);
+
       // TODO: apply nonnull return attributes to calls and invokes
       // TODO: apply range metadata for range check patterns?
     }
+
     // If there is a dominating assume with the same condition as this one,
     // then this one is redundant, and should be removed.
     APInt KnownZero(1, 0), KnownOne(1, 0);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 5276bee4e0a2..388c5e4e7fa4 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // separated by a few arithmetic operations.
   BasicBlock::iterator BBI(LI);
   bool IsLoadCSE = false;
-  if (Value *AvailableVal =
-      FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
-                               DefMaxInstsToScan, AA, &IsLoadCSE)) {
-    if (IsLoadCSE) {
-      LoadInst *NLI = cast<LoadInst>(AvailableVal);
-      unsigned KnownIDs[] = {
-          LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
-          LLVMContext::MD_noalias,         LLVMContext::MD_range,
-          LLVMContext::MD_invariant_load,  LLVMContext::MD_nonnull,
-          LLVMContext::MD_invariant_group, LLVMContext::MD_align,
-          LLVMContext::MD_dereferenceable,
-          LLVMContext::MD_dereferenceable_or_null};
-      combineMetadata(NLI, &LI, KnownIDs);
-    };
+  if (Value *AvailableVal = FindAvailableLoadedValue(
+          &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
+    if (IsLoadCSE)
+      combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
 
     return replaceInstUsesWith(
         LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index bc38c4aca348..5ad2a1c0e3e6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
     unsigned ShAmt = Op1C->getZExtValue();
 
+    // Turn:
+    //  %zext = zext i32 %V to i64
+    //  %res = shl i64 %V, 8
+    //
+    // Into:
+    //  %shl = shl i32 %V, 8
+    //  %res = zext i32 %shl to i64
+    //
+    // This is only valid if %V would have zeros shifted out.
+    if (auto *ZI = dyn_cast<ZExtInst>(I.getOperand(0))) {
+      unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits();
+      if (ShAmt < SrcBitWidth &&
+          MaskedValueIsZero(ZI->getOperand(0),
+                            APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) {
+        auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt);
+        return new ZExtInst(Shl, I.getType());
+      }
+    }
+
     // If the shifted-out value is known-zero, then this is a NUW shift.
     if (!I.hasNoUnsignedWrap() &&
         MaskedValueIsZero(I.getOperand(0),
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 9bf638dcbae3..16e08ee58fbe 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -481,9 +481,9 @@ private:
   bool processNode(DomTreeNode *Node);
 
   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
-    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+    if (auto *LI = dyn_cast<LoadInst>(Inst))
       return LI;
-    else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    if (auto *SI = dyn_cast<StoreInst>(Inst))
       return SI->getValueOperand();
     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
index dee61b77412e..8b8236390bf4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
 STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
 STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
 STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
+STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");
 
 //===----------------------------------------------------------------------===//
 //                                GVN Pass
@@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
   // Unlike loads, we never try to eliminate stores, so we do not check if they
   // are simple and avoid value numbering them.
   auto *SI = cast<StoreInst>(I);
-  // If this store's memorydef stores the same value as the last store, the
-  // memory accesses are equivalent.
-  // Get the expression, if any, for the RHS of the MemoryDef.
   MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
-  MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
-      cast<MemoryDef>(StoreAccess)->getDefiningAccess());
-  const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
-  // See if this store expression already has a value, and it's the same as our
-  // current store.  FIXME: Right now, we only do this for simple stores.
+  // See if we are defined by a previous store expression, it already has a
+  // value, and it's the same value as our current store. FIXME: Right now, we
+  // only do this for simple stores, we should expand to cover memcpys, etc.
   if (SI->isSimple()) {
+    // Get the expression, if any, for the RHS of the MemoryDef.
+    MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
+        cast<MemoryDef>(StoreAccess)->getDefiningAccess());
+    const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
     CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
     if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
         CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
@@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
     if (auto *I = dyn_cast<Instruction>(V)) {
       if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
         // If this is a MemoryDef, we need to update the equivalence table. If
-        // we
-        // determined the expression is congruent to a different memory state,
-        // use that different memory state.  If we determined it didn't, we
-        // update
-        // that as well. Note that currently, we do not guarantee the
-        // "different" memory state dominates us.  The goal is to make things
-        // that are congruent look congruent, not ensure we can eliminate one in
-        // favor of the other.
-        // Right now, the only way they can be equivalent is for store
-        // expresions.
-        if (!isa<MemoryUse>(MA)) {
-          if (E && isa<StoreExpression>(E) && EClass->Members.size() != 1) {
-            auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
-            setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
-          } else {
-            setMemoryAccessEquivTo(MA, nullptr);
-          }
+        // we determined the expression is congruent to a different memory
+        // state, use that different memory state.  If we determined it didn't,
+        // we update that as well.  Right now, we only support store
+        // expressions.
+        if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
+            EClass->Members.size() != 1) {
+          auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
+          setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
+        } else {
+          setMemoryAccessEquivTo(MA, nullptr);
         }
         markMemoryUsersTouched(MA);
       }
@@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
   } else {
     // Handle terminators that return values. All of them produce values we
     // don't currently understand.
-    if (!I->getType()->isVoidTy()){
+    if (!I->getType()->isVoidTy()) {
       auto *Symbolized = createUnknownExpression(I);
       performCongruenceFinding(I, Symbolized);
     }
@@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() {
       continue;
     if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
       auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
-      if (FirstMUD && SecondMUD) {
-        auto *FirstInst = FirstMUD->getMemoryInst();
-        auto *SecondInst = SecondMUD->getMemoryInst();
+      if (FirstMUD && SecondMUD)
         assert(
-            ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) &&
+            ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
+                ValueToClass.lookup(SecondMUD->getMemoryInst()) &&
             "The instructions for these memory operations should have been in "
             "the same congruence class");
-      }
     } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
 
       // We can only sanely verify that MemoryDefs in the operand list all have
@@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
 
   initializeCongruenceClasses(F);
 
+  unsigned int Iterations = 0;
   // We start out in the entry block.
   BasicBlock *LastBlock = &F.getEntryBlock();
   while (TouchedInstructions.any()) {
+    ++Iterations;
     // Walk through all the instructions in all the blocks in RPO.
     for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
          InstrNum = TouchedInstructions.find_next(InstrNum)) {
@@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
       TouchedInstructions.reset(InstrNum);
     }
   }
-
-// FIXME: Move this to expensive checks when we are satisfied with NewGVN
+  NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
 #ifndef NDEBUG
   verifyMemoryCongruency();
 #endif
@@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
 
     // Cleanup the congruence class.
     SmallPtrSet<Value *, 4> MembersLeft;
-    for (Value * Member : CC->Members) {
+    for (Value *Member : CC->Members) {
       if (Member->getType()->isVoidTy()) {
         MembersLeft.insert(Member);
         continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index a2ceded106b4..a40079ca8e76 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
 
 /// When inlining a function that contains noalias scope metadata,
 /// this metadata needs to be cloned so that the inlined blocks
-/// have different "unqiue scopes" at every call site. Were this not done, then
+/// have different "unique scopes" at every call site. Were this not done, then
 /// aliasing scopes from a function inlined into a caller multiple times could
 /// not be differentiated (and this would lead to miscompiles because the
 /// non-aliasing property communicated by the metadata could have
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index dc526a20c903..842cf31f2e3d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
 
   uint64_t TrueWeight, FalseWeight;
-  uint64_t ExitWeight = 0, BackEdgeWeight = 0;
+  uint64_t ExitWeight = 0, CurHeaderWeight = 0;
   if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
     ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
-    BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight;
+    // The # of times the loop body executes is the sum of the exit block
+    // weight and the # of times the backedges are taken.
+    CurHeaderWeight = TrueWeight + FalseWeight;
   }
 
   // For each peeled-off iteration, make a copy of the loop.
@@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
     SmallVector<BasicBlock *, 8> NewBlocks;
     ValueToValueMapTy VMap;
 
-    // The exit weight of the previous iteration is the header entry weight
-    // of the current iteration. So this is exactly how many dynamic iterations
-    // the current peeled-off static iteration uses up.
+    // Subtract the exit weight from the current header weight -- the exit
+    // weight is exactly the weight of the previous iteration's header.
     // FIXME: due to the way the distribution is constructed, we need a
     // guard here to make sure we don't end up with non-positive weights.
-    if (ExitWeight < BackEdgeWeight)
-      BackEdgeWeight -= ExitWeight;
+    if (ExitWeight < CurHeaderWeight)
+      CurHeaderWeight -= ExitWeight;
     else
-      BackEdgeWeight = 1;
+      CurHeaderWeight = 1;
 
     cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
                     NewBlocks, LoopBlocks, VMap, LVMap, LI);
@@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
 
   // Adjust the branch weights on the loop exit.
   if (ExitWeight) {
+    // The backedge count is the difference of current header weight and
+    // current loop exit weight. If the current header weight is smaller than
+    // the current loop exit weight, we mark the loop backedge weight as 1.
+    uint64_t BackEdgeWeight = 0;
+    if (ExitWeight < CurHeaderWeight)
+      BackEdgeWeight = CurHeaderWeight - ExitWeight;
+    else
+      BackEdgeWeight = 1;
     MDBuilder MDB(LatchBR->getContext());
     MDNode *WeightNode =
         HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3846b21c502e..54390e77bb1f 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
     I0->getOperandUse(O).set(NewOperands[O]);
   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
 
-  // Update metadata and IR flags.
+  // The debug location for the "common" instruction is the merged locations of
+  // all the commoned instructions.  We start with the original location of the
+  // "common" instruction and iteratively merge each location in the loop below.
+  DILocation *Loc = I0->getDebugLoc();
+
+  // Update metadata and IR flags, and merge debug locations.
   for (auto *I : Insts)
     if (I != I0) {
+      Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
       combineMetadataForCSE(I0, I);
       I0->andIRFlags(I);
     }
+  if (!isa<CallInst>(I0))
+    I0->setDebugLoc(Loc);
 
   if (!isa<StoreInst>(I0)) {
     // canSinkLastInstruction checked that all instructions were used by
diff --git a/contrib/llvm/tools/clang/include/clang-c/Index.h b/contrib/llvm/tools/clang/include/clang-c/Index.h
index 47998859f674..e9d9ab03a8b0 100644
--- a/contrib/llvm/tools/clang/include/clang-c/Index.h
+++ b/contrib/llvm/tools/clang/include/clang-c/Index.h
@@ -2366,7 +2366,11 @@ enum CXCursorKind {
    */
   CXCursor_OMPTargetTeamsDistributeParallelForDirective = 277,
 
-  CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForDirective,
+  /** \brief OpenMP target teams distribute parallel for simd directive.
+   */
+  CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective = 278,
+
+  CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective,
 
   /**
    * \brief Cursor that represents the translation unit itself.
diff --git a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
index c0b0400cb88c..cbf0bee69f00 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2669,6 +2669,9 @@ DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeDirective,
 DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForSimdDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 // OpenMP clauses.
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {
diff --git a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
index 80300dae80df..61cae7b6d258 100644
--- a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
@@ -3638,6 +3638,79 @@ public:
   }
 };
 
+/// This represents '#pragma omp target teams distribute parallel for simd'
+/// combined directive.
+///
+/// \code
+/// #pragma omp target teams distribute parallel for simd private(x)
+/// \endcode
+/// In this example directive '#pragma omp target teams distribute parallel
+/// for simd' has clause 'private' with the variables 'x'
+///
+class OMPTargetTeamsDistributeParallelForSimdDirective final
+    : public OMPLoopDirective {
+  friend class ASTStmtReader;
+
+  /// Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  OMPTargetTeamsDistributeParallelForSimdDirective(SourceLocation StartLoc,
+                                                   SourceLocation EndLoc,
+                                                   unsigned CollapsedNum,
+                                                   unsigned NumClauses)
+      : OMPLoopDirective(this,
+                         OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
+                         OMPD_target_teams_distribute_parallel_for_simd,
+                         StartLoc, EndLoc, CollapsedNum, NumClauses) {}
+
+  /// Build an empty directive.
+  ///
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPTargetTeamsDistributeParallelForSimdDirective(
+      unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(
+            this, OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
+            OMPD_target_teams_distribute_parallel_for_simd, SourceLocation(),
+            SourceLocation(), CollapsedNum, NumClauses) {}
+
+public:
+  /// Creates directive with a list of \a Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param CollapsedNum Number of collapsed loops.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param Exprs Helper expressions for CodeGen.
+  ///
+  static OMPTargetTeamsDistributeParallelForSimdDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+  /// Creates an empty directive with the place for \a NumClauses clauses.
+  ///
+  /// \param C AST context.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  static OMPTargetTeamsDistributeParallelForSimdDirective *
+  CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
+              EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() ==
+           OMPTargetTeamsDistributeParallelForSimdDirectiveClass;
+  }
+};
+
 } // end namespace clang
 
 #endif
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c4e4e2b60192..7f249c434ad6 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3921,6 +3921,8 @@ def ext_ms_deref_template_argument: ExtWarn<
 def ext_ms_delayed_template_argument: ExtWarn<
   "using the undeclared type %0 as a default template argument is a "
   "Microsoft extension">, InGroup<MicrosoftTemplate>;
+def err_template_arg_deduced_incomplete_pack : Error<
+  "deduced incomplete pack %0 for template parameter %1">;
 
 // C++ template specialization
 def err_template_spec_unknown_kind : Error<
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
index 808e0d2bb0c7..58b54ce0bcd6 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
@@ -162,6 +162,9 @@
 #ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
 #define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)
 #endif
+#ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
+#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)
+#endif
 
 // OpenMP directives.
 OPENMP_DIRECTIVE(threadprivate)
@@ -214,6 +217,7 @@ OPENMP_DIRECTIVE_EXT(teams_distribute_parallel_for, "teams distribute parallel f
 OPENMP_DIRECTIVE_EXT(target_teams, "target teams")
 OPENMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute")
 OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for, "target teams distribute parallel for")
+OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, "target teams distribute parallel for simd")
 
 // OpenMP clauses.
 OPENMP_CLAUSE(if, OMPIfClause)
@@ -793,6 +797,33 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(proc_bind)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
 
+// Clauses allowed for OpenMP directive
+// 'target teams distribute parallel for simd'.
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(if)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(device)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(map)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(private)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(nowait)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(depend)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(defaultmap)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(firstprivate)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(is_device_ptr)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(default)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(shared)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(reduction)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_teams)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(thread_limit)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(lastprivate)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(collapse)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(dist_schedule)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_threads)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(proc_bind)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(schedule)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(linear)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(aligned)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(safelen)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(simdlen)
+
 #undef OPENMP_TASKLOOP_SIMD_CLAUSE
 #undef OPENMP_TASKLOOP_CLAUSE
 #undef OPENMP_LINEAR_KIND
@@ -843,3 +874,4 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
 #undef OPENMP_TARGET_TEAMS_CLAUSE
 #undef OPENMP_TARGET_TEAMS_DISTRIBUTE_CLAUSE
 #undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
+#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
diff --git a/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td b/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
index d8eed553d86e..2e92e5006ff4 100644
--- a/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
@@ -243,3 +243,4 @@ def OMPTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
 def OMPTargetTeamsDirective : DStmt<OMPExecutableDirective>;
 def OMPTargetTeamsDistributeDirective : DStmt<OMPLoopDirective>;
 def OMPTargetTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
+def OMPTargetTeamsDistributeParallelForSimdDirective : DStmt<OMPLoopDirective>;
diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td
index 1f1222e10636..7f4e59a2d233 100644
--- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td
@@ -882,7 +882,7 @@ def fms_compatibility_version
                "(default))">;
 def fdelayed_template_parsing : Flag<["-"], "fdelayed-template-parsing">, Group<f_Group>,
   HelpText<"Parse templated function definitions at the end of the "
-           "translation unit">,  Flags<[CC1Option]>;
+           "translation unit">,  Flags<[CC1Option, CoreOption]>;
 def fms_memptr_rep_EQ : Joined<["-"], "fms-memptr-rep=">, Group<f_Group>, Flags<[CC1Option]>;
 def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group<i_Group>,
   Flags<[DriverOption, CC1Option]>, MetaVarName<"<directory>">,
@@ -1031,7 +1031,8 @@ def fno_ms_extensions : Flag<["-"], "fno-ms-extensions">, Group<f_Group>,
   Flags<[CoreOption]>;
 def fno_ms_compatibility : Flag<["-"], "fno-ms-compatibility">, Group<f_Group>,
   Flags<[CoreOption]>;
-def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>;
+def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>,
+  Flags<[DriverOption, CoreOption]>;
 def fno_objc_exceptions: Flag<["-"], "fno-objc-exceptions">, Group<f_Group>;
 def fno_objc_legacy_dispatch : Flag<["-"], "fno-objc-legacy-dispatch">, Group<f_Group>;
 def fno_objc_weak : Flag<["-"], "fno-objc-weak">, Group<f_Group>, Flags<[CC1Option]>;
@@ -1331,6 +1332,12 @@ def funique_section_names : Flag <["-"], "funique-section-names">,
 def fno_unique_section_names : Flag <["-"], "fno-unique-section-names">,
   Group<f_Group>, Flags<[CC1Option]>;
 
+def fstrict_return : Flag<["-"], "fstrict-return">, Group<f_Group>,
+  Flags<[CC1Option]>,
+  HelpText<"Always treat control flow paths that fall off the end of a non-void"
+           "function as unreachable">;
+def fno_strict_return : Flag<["-"], "fno-strict-return">, Group<f_Group>,
+  Flags<[CC1Option]>;
 
 def fdebug_types_section: Flag <["-"], "fdebug-types-section">, Group<f_Group>,
   Flags<[CC1Option]>, HelpText<"Place debug types in their own section (ELF Only)">;
diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
index 1f0c83b5bfa7..54c9f81265a6 100644
--- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
@@ -251,6 +251,10 @@ CODEGENOPT(DiagnosticsWithHotness, 1, 0)
 /// Whether copy relocations support is available when building as PIE.
 CODEGENOPT(PIECopyRelocations, 1, 0)
 
+/// Whether we should use the undefined behaviour optimization for control flow
+/// paths that reach the end of a function without executing a required return.
+CODEGENOPT(StrictReturn, 1, 1)
+
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
 #undef VALUE_CODEGENOPT
diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
index 82caaeb24ae7..f0999f68470c 100644
--- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
@@ -8514,6 +8514,12 @@ public:
       ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
       SourceLocation EndLoc,
       llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
+  /// Called on well-formed '\#pragma omp target teams distribute parallel for
+  /// simd' after parsing of the associated statement.
+  StmtResult ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
+      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+      SourceLocation EndLoc,
+      llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
 
   /// Checks correctness of linear modifiers.
   bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,
diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
index 861fe64096af..61e2f18045ea 100644
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1516,6 +1516,7 @@ namespace clang {
       STMT_OMP_TARGET_TEAMS_DIRECTIVE,
       STMT_OMP_TARGET_TEAMS_DISTRIBUTE_DIRECTIVE,
       STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE,
+      STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE,
       EXPR_OMP_ARRAY_SECTION,
 
       // ARC
diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h
new file mode 100644
index 000000000000..d657f16df183
--- /dev/null
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h
@@ -0,0 +1,100 @@
+//===--- Diagnostic.h - Framework for clang diagnostics tools --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+//  Structures supporting diagnostics and refactorings that span multiple
+//  translation units. Indicate diagnostics reports and replacements
+//  suggestions for the analyzed sources.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
+#define LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
+
+#include "Replacement.h"
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace clang {
+namespace tooling {
+
+/// \brief Represents the diagnostic message with the error message associated
+/// and the information on the location of the problem.
+struct DiagnosticMessage {
+  DiagnosticMessage(llvm::StringRef Message = "");
+
+  /// \brief Constructs a diagnostic message with anoffset to the diagnostic
+  /// within the file where the problem occured.
+  ///
+  /// \param Loc Should be a file location, it is not meaningful for a macro
+  /// location.
+  ///
+  DiagnosticMessage(llvm::StringRef Message, const SourceManager &Sources,
+                    SourceLocation Loc);
+  std::string Message;
+  std::string FilePath;
+  unsigned FileOffset;
+};
+
+/// \brief Represents the diagnostic with the level of severity and possible
+/// fixes to be applied.
+struct Diagnostic {
+  enum Level {
+    Warning = DiagnosticsEngine::Warning,
+    Error = DiagnosticsEngine::Error
+  };
+
+  Diagnostic() = default;
+
+  Diagnostic(llvm::StringRef DiagnosticName, Level DiagLevel,
+             StringRef BuildDirectory);
+
+  Diagnostic(llvm::StringRef DiagnosticName, DiagnosticMessage &Message,
+             llvm::StringMap<Replacements> &Fix,
+             SmallVector<DiagnosticMessage, 1> &Notes, Level DiagLevel,
+             llvm::StringRef BuildDirectory);
+
+  /// \brief Name identifying the Diagnostic.
+  std::string DiagnosticName;
+
+  /// \brief Message associated to the diagnostic.
+  DiagnosticMessage Message;
+
+  /// \brief Fixes to apply, grouped by file path.
+  llvm::StringMap<Replacements> Fix;
+
+  /// \brief Potential notes about the diagnostic.
+  SmallVector<DiagnosticMessage, 1> Notes;
+
+  /// \brief Diagnostic level. Can indicate either an error or a warning.
+  Level DiagLevel;
+
+  /// \brief A build directory of the diagnostic source file.
+  ///
+  /// It's an absolute path which is `directory` field of the source file in
+  /// compilation database. If users don't specify the compilation database
+  /// directory, it is the current directory where clang-tidy runs.
+  ///
+  /// Note: it is empty in unittest.
+  std::string BuildDirectory;
+};
+
+/// \brief Collection of Diagnostics generated from a single translation unit.
+struct TranslationUnitDiagnostics {
+  /// Name of the main source for the translation unit.
+  std::string MainSourceFile;
+  std::vector<Diagnostic> Diagnostics;
+};
+
+} // end namespace tooling
+} // end namespace clang
+#endif // LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h
index 95dc3cd6e763..8d4a22adf368 100644
--- a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h
@@ -329,12 +329,6 @@ llvm::Expected<std::string> applyAllReplacements(StringRef Code,
 struct TranslationUnitReplacements {
   /// Name of the main source for the translation unit.
   std::string MainSourceFile;
-
-  /// A freeform chunk of text to describe the context of the replacements.
-  /// Will be printed, for example, when detecting conflicts during replacement
-  /// deduplication.
-  std::string Context;
-
   std::vector<Replacement> Replacements;
 };
 
diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h b/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
new file mode 100644
index 000000000000..f32b9fa9c94b
--- /dev/null
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
@@ -0,0 +1,101 @@
+//===-- DiagnosticsYaml.h -- Serialiazation for Diagnosticss ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the structure of a YAML document for serializing
+/// diagnostics.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
+#define LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
+
+#include "clang/Tooling/Core/Diagnostic.h"
+#include "clang/Tooling/ReplacementsYaml.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <string>
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::tooling::Diagnostic)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<clang::tooling::Diagnostic> {
+  /// \brief Helper to (de)serialize a Diagnostic since we don't have direct
+  /// access to its data members.
+  class NormalizedDiagnostic {
+  public:
+    NormalizedDiagnostic(const IO &)
+        : DiagLevel(clang::tooling::Diagnostic::Level::Warning) {}
+
+    NormalizedDiagnostic(const IO &, const clang::tooling::Diagnostic &D)
+        : DiagnosticName(D.DiagnosticName), Message(D.Message), Fix(D.Fix),
+          Notes(D.Notes), DiagLevel(D.DiagLevel),
+          BuildDirectory(D.BuildDirectory) {}
+
+    clang::tooling::Diagnostic denormalize(const IO &) {
+      return clang::tooling::Diagnostic(DiagnosticName, Message, Fix, Notes,
+                                        DiagLevel, BuildDirectory);
+    }
+
+    std::string DiagnosticName;
+    clang::tooling::DiagnosticMessage Message;
+    llvm::StringMap<clang::tooling::Replacements> Fix;
+    SmallVector<clang::tooling::DiagnosticMessage, 1> Notes;
+    clang::tooling::Diagnostic::Level DiagLevel;
+    std::string BuildDirectory;
+  };
+
+  static void mapping(IO &Io, clang::tooling::Diagnostic &D) {
+    MappingNormalization<NormalizedDiagnostic, clang::tooling::Diagnostic> Keys(
+        Io, D);
+    Io.mapRequired("DiagnosticName", Keys->DiagnosticName);
+
+    // FIXME: Export properly all the different fields.
+
+    std::vector<clang::tooling::Replacement> Fixes;
+    for (auto &Replacements : Keys->Fix) {
+      for (auto &Replacement : Replacements.second) {
+        Fixes.push_back(Replacement);
+      }
+    }
+    Io.mapRequired("Replacements", Fixes);
+    for (auto &Fix : Fixes) {
+      llvm::Error Err = Keys->Fix[Fix.getFilePath()].add(Fix);
+      if (Err) {
+        // FIXME: Implement better conflict handling.
+        llvm::errs() << "Fix conflicts with existing fix: "
+                     << llvm::toString(std::move(Err)) << "\n";
+      }
+    }
+  }
+};
+
+/// \brief Specialized MappingTraits to describe how a
+/// TranslationUnitDiagnostics is (de)serialized.
+template <> struct MappingTraits<clang::tooling::TranslationUnitDiagnostics> {
+  static void mapping(IO &Io, clang::tooling::TranslationUnitDiagnostics &Doc) {
+    Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
+
+    std::vector<clang::tooling::Diagnostic> Diagnostics;
+    for (auto &Diagnostic : Doc.Diagnostics) {
+      // FIXME: Export all diagnostics, not just the ones with fixes.
+      // Update MappingTraits<clang::tooling::Diagnostic>::mapping.
+      if (Diagnostic.Fix.size() > 0) {
+        Diagnostics.push_back(Diagnostic);
+      }
+    }
+    Io.mapRequired("Diagnostics", Diagnostics);
+    Doc.Diagnostics = Diagnostics;
+  }
+};
+} // end namespace yaml
+} // end namespace llvm
+
+#endif // LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h b/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h
index 47b7f3f9a534..0b1dc4c77423 100644
--- a/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h
@@ -65,7 +65,6 @@ template <> struct MappingTraits<clang::tooling::TranslationUnitReplacements> {
   static void mapping(IO &Io,
                       clang::tooling::TranslationUnitReplacements &Doc) {
     Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
-    Io.mapOptional("Context", Doc.Context, std::string());
     Io.mapRequired("Replacements", Doc.Replacements);
   }
 };
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
index b3f8925b6464..6dcb705c44d3 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
@@ -7192,6 +7192,12 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
                                CharUnits &EndOffset) {
   bool DetermineForCompleteObject = refersToCompleteObject(LVal);
 
+  auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) {
+    if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType())
+      return false;
+    return HandleSizeof(Info, ExprLoc, Ty, Result);
+  };
+
   // We want to evaluate the size of the entire object. This is a valid fallback
   // for when Type=1 and the designator is invalid, because we're asked for an
   // upper-bound.
@@ -7209,7 +7215,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
       return false;
 
     QualType BaseTy = getObjectType(LVal.getLValueBase());
-    return !BaseTy.isNull() && HandleSizeof(Info, ExprLoc, BaseTy, EndOffset);
+    return CheckedHandleSizeof(BaseTy, EndOffset);
   }
 
   // We want to evaluate the size of a subobject.
@@ -7238,7 +7244,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
   }
 
   CharUnits BytesPerElem;
-  if (!HandleSizeof(Info, ExprLoc, Designator.MostDerivedType, BytesPerElem))
+  if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem))
     return false;
 
   // According to the GCC documentation, we want the size of the subobject
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
index 0a90740162b9..a7c71bb5f45c 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
@@ -1659,3 +1659,64 @@ OMPTargetTeamsDistributeParallelForDirective::CreateEmpty(const ASTContext &C,
   return new (Mem)
       OMPTargetTeamsDistributeParallelForDirective(CollapsedNum, NumClauses);
 }
+
+OMPTargetTeamsDistributeParallelForSimdDirective *
+OMPTargetTeamsDistributeParallelForSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  auto Size =
+      llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
+                    alignof(OMPClause *));
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum,
+                          OMPD_target_teams_distribute_parallel_for_simd));
+  OMPTargetTeamsDistributeParallelForSimdDirective *Dir =
+      new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
+           StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPTargetTeamsDistributeParallelForSimdDirective *
+OMPTargetTeamsDistributeParallelForSimdDirective::CreateEmpty(
+    const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
+    EmptyShell) {
+  auto Size =
+      llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
+                    alignof(OMPClause *));
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum,
+                          OMPD_target_teams_distribute_parallel_for_simd));
+  return new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
+      CollapsedNum, NumClauses);
+}
+
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
index a8f493dca07d..a9c64c3ba6ae 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
@@ -1244,6 +1244,12 @@ void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForDirective(
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
+    OMPTargetTeamsDistributeParallelForSimdDirective *Node) {
+  Indent() << "#pragma omp target teams distribute parallel for simd ";
+  PrintOMPExecutableDirective(Node);
+}
+
 //===----------------------------------------------------------------------===//
 //  Expr printing methods.
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
index dd59a9b96c98..df36bf06b843 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
@@ -763,6 +763,11 @@ void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForDirective(
   VisitOMPLoopDirective(S);
 }
 
+void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
+    const OMPTargetTeamsDistributeParallelForSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
 void StmtProfiler::VisitExpr(const Expr *S) {
   VisitStmt(S);
 }
diff --git a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
index bf3cc05cdb6e..a1a463f1d037 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
@@ -1690,15 +1690,19 @@ CFGBuilder::VisitLogicalOperator(BinaryOperator *B,
     // we have been provided.
     ExitBlock = RHSBlock = createBlock(false);
 
+    // Even though KnownVal is only used in the else branch of the next
+    // conditional, tryEvaluateBool performs additional checking on the
+    // Expr, so it should be called unconditionally.
+    TryResult KnownVal = tryEvaluateBool(RHS);
+    if (!KnownVal.isKnown())
+      KnownVal = tryEvaluateBool(B);
+
     if (!Term) {
       assert(TrueBlock == FalseBlock);
       addSuccessor(RHSBlock, TrueBlock);
     }
     else {
       RHSBlock->setTerminator(Term);
-      TryResult KnownVal = tryEvaluateBool(RHS);
-      if (!KnownVal.isKnown())
-        KnownVal = tryEvaluateBool(B);
       addSuccessor(RHSBlock, TrueBlock, !KnownVal.isFalse());
       addSuccessor(RHSBlock, FalseBlock, !KnownVal.isTrue());
     }
diff --git a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
index 4675995ea722..7bd1f8762bff 100644
--- a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
@@ -685,6 +685,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 #define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)               \
   case OMPC_##Name:                                                            \
     return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_teams_distribute_parallel_for_simd:
+    switch (CKind) {
+#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)          \
+  case OMPC_##Name:                                                            \
+    return true;
 #include "clang/Basic/OpenMPKinds.def"
     default:
       break;
@@ -721,7 +731,8 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_teams_distribute_parallel_for_simd ||
          DKind == OMPD_teams_distribute_parallel_for ||
          DKind == OMPD_target_teams_distribute ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }
 
 bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@@ -735,8 +746,8 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_target_parallel_for_simd ||
          DKind == OMPD_teams_distribute_parallel_for_simd ||
          DKind == OMPD_teams_distribute_parallel_for ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
-  // TODO add next directives.
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }
 
 bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
@@ -752,8 +763,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_target_parallel_for_simd ||
          DKind == OMPD_teams_distribute_parallel_for ||
          DKind == OMPD_teams_distribute_parallel_for_simd ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
-  // TODO add next directives.
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }
 
 bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
@@ -761,7 +772,8 @@ bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_target_parallel_for || 
          DKind == OMPD_target_parallel_for_simd || DKind == OMPD_target_simd ||
          DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }
 
 bool clang::isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind) {
@@ -779,7 +791,8 @@ bool clang::isOpenMPNestingTeamsDirective(OpenMPDirectiveKind DKind) {
 bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
   return isOpenMPNestingTeamsDirective(DKind) ||
          DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }
 
 bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
@@ -788,8 +801,8 @@ bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_distribute_parallel_for_simd ||
          DKind == OMPD_distribute_simd || DKind == OMPD_target_simd ||
          DKind == OMPD_teams_distribute_simd ||
-         DKind == OMPD_teams_distribute_parallel_for_simd;
-  // TODO add next directives.
+         DKind == OMPD_teams_distribute_parallel_for_simd ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }
 
 bool clang::isOpenMPNestingDistributeDirective(OpenMPDirectiveKind Kind) {
@@ -805,7 +818,8 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
          Kind == OMPD_teams_distribute_parallel_for_simd ||
          Kind == OMPD_teams_distribute_parallel_for ||
          Kind == OMPD_target_teams_distribute ||
-         Kind == OMPD_target_teams_distribute_parallel_for;
+         Kind == OMPD_target_teams_distribute_parallel_for ||
+         Kind == OMPD_target_teams_distribute_parallel_for_simd;
 }
 
 bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
@@ -830,5 +844,6 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
          Kind == OMPD_teams_distribute_parallel_for_simd ||
          Kind == OMPD_teams_distribute_parallel_for ||
          Kind == OMPD_target_teams_distribute ||
-         Kind == OMPD_target_teams_distribute_parallel_for;
+         Kind == OMPD_target_teams_distribute_parallel_for ||
+         Kind == OMPD_target_teams_distribute_parallel_for_simd;
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 451f9e9221ad..fe0e2acdfdbf 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -20,53 +20,64 @@
 using namespace clang;
 using namespace CodeGen;
 
-/// \brief Get the GPU warp size.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+  /// kmp_int32 thread_limit);
+  OMPRTL_NVPTX__kmpc_kernel_init,
+};
+
+// NVPTX Address space
+enum AddressSpace {
+  AddressSpaceShared = 3,
+};
+} // namespace
+
+/// Get the GPU warp size.
+static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   return Bld.CreateCall(
       llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
       llvm::None, "nvptx_warp_size");
 }
 
-/// \brief Get the id of the current thread on the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
+/// Get the id of the current thread on the GPU.
+static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   return Bld.CreateCall(
       llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
       llvm::None, "nvptx_tid");
 }
 
-// \brief Get the maximum number of threads in a block of the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
+/// Get the maximum number of threads in a block of the GPU.
+static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   return Bld.CreateCall(
       llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
       llvm::None, "nvptx_num_threads");
 }
 
-/// \brief Get barrier to synchronize all threads in a block.
-void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
+/// Get barrier to synchronize all threads in a block.
+static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   Bld.CreateCall(llvm::Intrinsic::getDeclaration(
-      &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
+      &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
 }
 
-// \brief Synchronize all GPU threads in a block.
-void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
-  getNVPTXCTABarrier(CGF);
-}
+/// Synchronize all GPU threads in a block.
+static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
 
-/// \brief Get the thread id of the OMP master thread.
+/// Get the thread id of the OMP master thread.
 /// The master thread id is the first thread (lane) of the last warp in the
 /// GPU block.  Warp size is assumed to be some power of 2.
 /// Thread id is 0 indexed.
 /// E.g: If NumThreads is 33, master id is 32.
 ///      If NumThreads is 64, master id is 32.
 ///      If NumThreads is 1024, master id is 992.
-llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
+static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
 
@@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
                        Bld.CreateNot(Mask), "master_tid");
 }
 
-namespace {
-enum OpenMPRTLFunctionNVPTX {
-  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
-  /// kmp_int32 thread_limit);
-  OMPRTL_NVPTX__kmpc_kernel_init,
-};
-
-// NVPTX Address space
-enum ADDRESS_SPACE {
-  ADDRESS_SPACE_SHARED = 3,
-};
-} // namespace
-
 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
     CodeGenModule &CGM)
     : WorkerFn(nullptr), CGFI(nullptr) {
@@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
       CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
       llvm::GlobalValue::CommonLinkage,
       llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
-      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+      llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
   ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
 
   WorkID = new llvm::GlobalVariable(
       CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
       llvm::GlobalValue::CommonLinkage,
       llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
-      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+      llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
   WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index e18d28cdda9f..a33fb27579f6 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -49,38 +49,6 @@ public:
   void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
 
 private:
-  //
-  // NVPTX calls.
-  //
-
-  /// \brief Get the GPU warp size.
-  llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
-
-  /// \brief Get the id of the current thread on the GPU.
-  llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
-
-  // \brief Get the maximum number of threads in a block of the GPU.
-  llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
-
-  /// \brief Get barrier to synchronize all threads in a block.
-  void getNVPTXCTABarrier(CodeGenFunction &CGF);
-
-  // \brief Synchronize all GPU threads in a block.
-  void syncCTAThreads(CodeGenFunction &CGF);
-
-  //
-  // OMP calls.
-  //
-
-  /// \brief Get the thread id of the OMP master thread.
-  /// The master thread id is the first thread (lane) of the last warp in the
-  /// GPU block.  Warp size is assumed to be some power of 2.
-  /// Thread id is 0 indexed.
-  /// E.g: If NumThreads is 33, master id is 32.
-  ///      If NumThreads is 64, master id is 32.
-  ///      If NumThreads is 1024, master id is 992.
-  llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
-
   //
   // Private state and methods.
   //
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
index f2acb798b881..8d391f95d9f7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -326,6 +326,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
     EmitOMPTargetTeamsDistributeParallelForDirective(
         cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
     break;
+  case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
+    EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+        cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
+    break;
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index ba39e1fbd41f..386c4f0fe69c 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2032,6 +2032,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
       });
 }
 
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+    const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_target_teams_distribute_parallel_for_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
 /// \brief Emit a helper variable and return corresponding lvalue.
 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
                                const DeclRefExpr *Helper) {
@@ -2760,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
 
   auto &RT = CGM.getOpenMPRuntime();
 
+  bool HasLastprivateClause = false;
   // Check pre-condition.
   {
     OMPLoopScope PreInitScope(*this, S);
@@ -2793,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
 
       OMPPrivateScope LoopScope(*this);
+      if (EmitOMPFirstprivateClause(S, LoopScope)) {
+        // Emit implicit barrier to synchronize threads and avoid data races on
+        // initialization of firstprivate variables and post-update of
+        // lastprivate variables.
+        CGM.getOpenMPRuntime().emitBarrierCall(
+          *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+          /*ForceSimpleCall=*/true);
+      }
+      EmitOMPPrivateClause(S, LoopScope);
+      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitOMPPrivateLoopCounters(S, LoopScope);
       (void)LoopScope.Privatize();
 
@@ -2849,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
                             IL.getAddress(), Chunk);
       }
+
+      // Emit final copy of the lastprivate variables if IsLastIter != 0.
+      if (HasLastprivateClause)
+        EmitOMPLastprivateClauseFinal(
+            S, /*NoFinals=*/false,
+            Builder.CreateIsNotNull(
+                EmitLoadOfScalar(IL, S.getLocStart())));
     }
 
     // We're now done with the loop, so jump to the continuation block.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index a954f487d1e4..7cab13de923b 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1049,6 +1049,19 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
   return ResTy;
 }
 
+static bool
+shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD,
+                                             const ASTContext &Context) {
+  QualType T = FD->getReturnType();
+  // Avoid the optimization for functions that return a record type with a
+  // trivial destructor or another trivially copyable type.
+  if (const RecordType *RT = T.getCanonicalType()->getAs<RecordType>()) {
+    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      return !ClassDecl->hasTrivialDestructor();
+  }
+  return !T.isTriviallyCopyableType(Context);
+}
+
 void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                                    const CGFunctionInfo &FnInfo) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
@@ -1127,17 +1140,23 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   //   function call is used by the caller, the behavior is undefined.
   if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock &&
       !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
+    bool ShouldEmitUnreachable =
+        CGM.getCodeGenOpts().StrictReturn ||
+        shouldUseUndefinedBehaviorReturnOptimization(FD, getContext());
     if (SanOpts.has(SanitizerKind::Return)) {
       SanitizerScope SanScope(this);
       llvm::Value *IsFalse = Builder.getFalse();
       EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return),
                 SanitizerHandler::MissingReturn,
                 EmitCheckSourceLocation(FD->getLocation()), None);
-    } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-      EmitTrapCall(llvm::Intrinsic::trap);
+    } else if (ShouldEmitUnreachable) {
+      if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+        EmitTrapCall(llvm::Intrinsic::trap);
+    }
+    if (SanOpts.has(SanitizerKind::Return) || ShouldEmitUnreachable) {
+      Builder.CreateUnreachable();
+      Builder.ClearInsertionPoint();
     }
-    Builder.CreateUnreachable();
-    Builder.ClearInsertionPoint();
   }
 
   // Emit the standard function epilogue.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
index 222d0e97968a..1347f54df9ac 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
@@ -2699,6 +2699,8 @@ public:
       const OMPTargetTeamsDistributeDirective &S);
   void EmitOMPTargetTeamsDistributeParallelForDirective(
       const OMPTargetTeamsDistributeParallelForDirective &S);
+  void EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+      const OMPTargetTeamsDistributeParallelForSimdDirective &S);
 
   /// Emit outlined function for the target directive.
   static std::pair<llvm::Function * /*OutlinedFn*/,
@@ -3569,7 +3571,7 @@ public:
 
     // If we still have any arguments, emit them using the type of the argument.
     for (auto *A : llvm::make_range(Arg, ArgRange.end()))
-      ArgTypes.push_back(getVarArgType(A));
+      ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType());
 
     EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order);
   }
diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
index 2a367bb29aa5..ea5ad7d051b6 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@@ -2235,6 +2235,15 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
                    UseSeparateSections)) {
     CmdArgs.push_back("-plugin-opt=-data-sections");
   }
+
+  if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) {
+    StringRef FName = A->getValue();
+    if (!llvm::sys::fs::exists(FName))
+      D.Diag(diag::err_drv_no_such_file) << FName;
+    else
+      CmdArgs.push_back(
+          Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName));
+  }
 }
 
 /// This is a helper function for validating the optional refinement step
@@ -3058,6 +3067,10 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
         continue;
       }
 
+      if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() &&
+          Value == "-mbig-obj")
+        continue; // LLVM handles bigobj automatically
+
       switch (C.getDefaultToolChain().getArch()) {
       default:
         break;
@@ -4453,6 +4466,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
                    false))
     CmdArgs.push_back("-fstrict-enums");
+  if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return,
+                    true))
+    CmdArgs.push_back("-fno-strict-return");
   if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
                    options::OPT_fno_strict_vtable_pointers,
                    false))
diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp
index 70b90d6fa14e..389761d48249 100644
--- a/contrib/llvm/tools/clang/lib/Format/Format.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp
@@ -638,6 +638,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
     ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
     ChromiumStyle.ContinuationIndentWidth = 8;
     ChromiumStyle.IndentWidth = 4;
+  } else if (Language == FormatStyle::LK_JavaScript) {
+    ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
+    ChromiumStyle.AllowShortLoopsOnASingleLine = false;
   } else {
     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
index 84e06d05c739..370cf7afa330 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1255,10 +1255,13 @@ void UnwrappedLineParser::tryToParseJSFunction() {
     if (FormatTok->is(tok::l_brace))
       tryToParseBracedList();
     else
-      while (FormatTok->isNot(tok::l_brace) && !eof())
+      while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
         nextToken();
   }
 
+  if (FormatTok->is(tok::semi))
+    return;
+
   parseChildBlock();
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
index bd2ee06d1653..d8118cb30f63 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
@@ -370,6 +370,26 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
     break;
   }
 
+  case Decl::ClassTemplateSpecialization: {
+    const auto *CTSD = cast<ClassTemplateSpecializationDecl>(DC);
+    if (CTSD->isCompleteDefinition())
+      Out << "[class template specialization] ";
+    else
+      Out << "<class template specialization> ";
+    Out << *CTSD;
+    break;
+  }
+
+  case Decl::ClassTemplatePartialSpecialization: {
+    const auto *CTPSD = cast<ClassTemplatePartialSpecializationDecl>(DC);
+    if (CTPSD->isCompleteDefinition())
+      Out << "[class template partial specialization] ";
+    else
+      Out << "<class template partial specialization> ";
+    Out << *CTPSD;
+    break;
+  }
+
   default:
     llvm_unreachable("a decl that inherits DeclContext isn't handled");
   }
@@ -400,7 +420,8 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
     case Decl::CXXConstructor:
     case Decl::CXXDestructor:
     case Decl::CXXConversion:
-    {
+    case Decl::ClassTemplateSpecialization:
+    case Decl::ClassTemplatePartialSpecialization: {
       DeclContext* DC = cast<DeclContext>(I);
       PrintDeclContext(DC, Indentation+2);
       break;
@@ -478,6 +499,37 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
       Out << "<omp threadprivate> " << '"' << I << "\"\n";
       break;
     }
+    case Decl::Friend: {
+      Out << "<friend>";
+      if (const NamedDecl *ND = cast<FriendDecl>(I)->getFriendDecl())
+        Out << ' ' << *ND;
+      Out << "\n";
+      break;
+    }
+    case Decl::Using: {
+      Out << "<using> " << *cast<UsingDecl>(I) << "\n";
+      break;
+    }
+    case Decl::UsingShadow: {
+      Out << "<using shadow> " << *cast<UsingShadowDecl>(I) << "\n";
+      break;
+    }
+    case Decl::Empty: {
+      Out << "<empty>\n";
+      break;
+    }
+    case Decl::AccessSpec: {
+      Out << "<access specifier>\n";
+      break;
+    }
+    case Decl::VarTemplate: {
+      Out << "<var template> " << *cast<VarTemplateDecl>(I) << "\n";
+      break;
+    }
+    case Decl::StaticAssert: {
+      Out << "<static assert>\n";
+      break;
+    }
     default:
       Out << "DeclKind: " << DK << '"' << I << "\"\n";
       llvm_unreachable("decl unhandled");
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
index a0682e26e702..ca4a7655a37d 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
@@ -602,6 +602,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.NoDwarfDirectoryAsm = Args.hasArg(OPT_fno_dwarf_directory_asm);
   Opts.SoftFloat = Args.hasArg(OPT_msoft_float);
   Opts.StrictEnums = Args.hasArg(OPT_fstrict_enums);
+  Opts.StrictReturn = !Args.hasArg(OPT_fno_strict_return);
   Opts.StrictVTablePointers = Args.hasArg(OPT_fstrict_vtable_pointers);
   Opts.UnsafeFPMath = Args.hasArg(OPT_menable_unsafe_fp_math) ||
                       Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
diff --git a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 205e15b40b5d..931d44b6965b 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -232,6 +232,11 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
 // hardware, seems to generate faster machine code because ptxas can more easily
 // reason about our code.
 
+#if CUDA_VERSION >= 8000
+#include "sm_60_atomic_functions.hpp"
+#include "sm_61_intrinsics.hpp"
+#endif
+
 #undef __MATH_FUNCTIONS_HPP__
 
 // math_functions.hpp defines ::signbit as a __host__ __device__ function.  This
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
index 061721dfb8da..f9ea8af00f50 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
@@ -119,7 +119,8 @@ static OpenMPDirectiveKind ParseOpenMPDirectiveKind(Parser &P) {
     { OMPD_target, OMPD_teams, OMPD_target_teams },
     { OMPD_target_teams, OMPD_distribute, OMPD_target_teams_distribute },
     { OMPD_target_teams_distribute, OMPD_parallel, OMPD_target_teams_distribute_parallel },
-    { OMPD_target_teams_distribute_parallel, OMPD_for, OMPD_target_teams_distribute_parallel_for }
+    { OMPD_target_teams_distribute_parallel, OMPD_for, OMPD_target_teams_distribute_parallel_for },
+    { OMPD_target_teams_distribute_parallel_for, OMPD_simd, OMPD_target_teams_distribute_parallel_for_simd }
   };
   enum { CancellationPoint = 0, DeclareReduction = 1, TargetData = 2 };
   auto Tok = P.getCurToken();
@@ -758,6 +759,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
   case OMPD_target_teams:
   case OMPD_target_teams_distribute:
   case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd:
     Diag(Tok, diag::err_omp_unexpected_directive)
         << getOpenMPDirectiveName(DKind);
     break;
@@ -796,7 +798,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
 ///         'teams distribute parallel for simd' |
 ///         'teams distribute parallel for' | 'target teams' |
 ///         'target teams distribute' |
-///         'target teams distribute parallel for' {clause}
+///         'target teams distribute parallel for' |
+///         'target teams distribute parallel for simd' {clause}
 ///         annot_pragma_openmp_end
 ///
 StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
@@ -912,7 +915,8 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
   case OMPD_teams_distribute_parallel_for:
   case OMPD_target_teams:
   case OMPD_target_teams_distribute:
-  case OMPD_target_teams_distribute_parallel_for: {
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd: {
     ConsumeToken();
     // Parse directive name of the 'critical' directive if any.
     if (DKind == OMPD_critical) {
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp
index 6f272ec839f5..282633bbc9e1 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp
@@ -228,10 +228,8 @@ void Sema::EraseUnwantedCUDAMatches(
       [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
 
   // Erase all functions with lower priority.
-  Matches.erase(
-      llvm::remove_if(
-          Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }),
-      Matches.end());
+  llvm::erase_if(Matches,
+                 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
 }
 
 /// When an implicitly-declared special member has to invoke more than one
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
index d76bde574677..3eef366b75b3 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
@@ -7471,6 +7471,23 @@ void Sema::CodeCompleteObjCMethodDeclSelector(Scope *S,
   }
   
   Results.ExitScope();
+
+  if (!AtParameterName && !SelIdents.empty() &&
+      SelIdents.front()->getName().startswith("init")) {
+    for (const auto &M : PP.macros()) {
+      if (M.first->getName() != "NS_DESIGNATED_INITIALIZER")
+        continue;
+      Results.EnterNewScope();
+      CodeCompletionBuilder Builder(Results.getAllocator(),
+                                    Results.getCodeCompletionTUInfo());
+      Builder.AddTypedTextChunk(
+          Builder.getAllocator().CopyString(M.first->getName()));
+      Results.AddResult(CodeCompletionResult(Builder.TakeString(), CCP_Macro,
+                                             CXCursor_MacroDefinition));
+      Results.ExitScope();
+    }
+  }
+
   HandleCodeCompleteResults(this, CodeCompleter, 
                             CodeCompletionContext::CCC_Other,
                             Results.data(),Results.size());
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
index 804aadc0ff77..edceb537df75 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
@@ -1700,7 +1700,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   case OMPD_teams_distribute_parallel_for_simd:
   case OMPD_teams_distribute_parallel_for:
   case OMPD_target_teams_distribute:
-  case OMPD_target_teams_distribute_parallel_for: {
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy =
         Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
@@ -2439,6 +2440,12 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     AllowedNameModifiers.push_back(OMPD_target);
     AllowedNameModifiers.push_back(OMPD_parallel);
     break;
+  case OMPD_target_teams_distribute_parallel_for_simd:
+    Res = ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_target);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
   case OMPD_declare_target:
   case OMPD_end_declare_target:
   case OMPD_threadprivate:
@@ -6375,6 +6382,52 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
+StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' with number of loops, it will
+  // define the nested loops number.
+  auto NestedLoopCount = CheckOpenMPLoop(
+      OMPD_target_teams_distribute_parallel_for_simd,
+      getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp target teams distribute parallel for simd loop exprs were not "
+         "built");
+
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto *LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPTargetTeamsDistributeParallelForSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                              SourceLocation StartLoc,
                                              SourceLocation LParenLoc,
@@ -7397,7 +7450,8 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
     if (CurrDir == OMPD_target || CurrDir == OMPD_target_parallel ||
         CurrDir == OMPD_target_teams ||
         CurrDir == OMPD_target_teams_distribute ||
-        CurrDir == OMPD_target_teams_distribute_parallel_for) {
+        CurrDir == OMPD_target_teams_distribute_parallel_for ||
+        CurrDir == OMPD_target_teams_distribute_parallel_for_simd) {
       OpenMPClauseKind ConflictKind;
       if (DSAStack->checkMappableExprComponentListsForDecl(
               VD, /*CurrentRegionOnly=*/true,
@@ -7657,7 +7711,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
       if (CurrDir == OMPD_target || CurrDir == OMPD_target_parallel ||
           CurrDir == OMPD_target_teams ||
           CurrDir == OMPD_target_teams_distribute ||
-          CurrDir == OMPD_target_teams_distribute_parallel_for) {
+          CurrDir == OMPD_target_teams_distribute_parallel_for ||
+          CurrDir == OMPD_target_teams_distribute_parallel_for_simd) {
         OpenMPClauseKind ConflictKind;
         if (DSAStack->checkMappableExprComponentListsForDecl(
                 VD, /*CurrentRegionOnly=*/true,
@@ -10175,7 +10230,8 @@ checkMappableExpressionList(Sema &SemaRef, DSAStackTy *DSAS,
       // attribute clause on the same construct
       if ((DKind == OMPD_target || DKind == OMPD_target_teams ||
            DKind == OMPD_target_teams_distribute ||
-           DKind == OMPD_target_teams_distribute_parallel_for) && VD) {
+           DKind == OMPD_target_teams_distribute_parallel_for ||
+           DKind == OMPD_target_teams_distribute_parallel_for_simd) && VD) {
         auto DVar = DSAS->getTopDSA(VD, false);
         if (isOpenMPPrivate(DVar.CKind)) {
           SemaRef.Diag(ELoc, diag::err_omp_variable_in_given_clause_and_dsa)
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
index 47e3df20d911..b5c0e634fa50 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
@@ -8958,9 +8958,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
                S.IdentifyCUDAPreference(Caller, Cand->Function) ==
                    Sema::CFP_WrongSide;
       };
-      Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(),
-                                      IsWrongSideCandidate),
-                       Candidates.end());
+      llvm::erase_if(Candidates, IsWrongSideCandidate);
     }
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
index facc5d1b375b..66a10ef7993e 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
@@ -4244,7 +4244,7 @@ namespace {
     UnnamedLocalNoLinkageFinder(Sema &S, SourceRange SR) : S(S), SR(SR) { }
 
     bool Visit(QualType T) {
-      return inherited::Visit(T.getTypePtr());
+      return T.isNull() ? false : inherited::Visit(T.getTypePtr());
     }
 
 #define TYPE(Class, Parent) \
@@ -4497,17 +4497,7 @@ bool Sema::CheckTemplateArgument(TemplateTypeParmDecl *Param,
   //
   // C++11 allows these, and even in C++03 we allow them as an extension with
   // a warning.
-  bool NeedsCheck;
-  if (LangOpts.CPlusPlus11)
-    NeedsCheck =
-        !Diags.isIgnored(diag::warn_cxx98_compat_template_arg_unnamed_type,
-                         SR.getBegin()) ||
-        !Diags.isIgnored(diag::warn_cxx98_compat_template_arg_local_type,
-                         SR.getBegin());
-  else
-    NeedsCheck = Arg->hasUnnamedOrLocalType();
-
-  if (NeedsCheck) {
+  if (LangOpts.CPlusPlus11 || Arg->hasUnnamedOrLocalType()) {
     UnnamedLocalNoLinkageFinder Finder(*this, SR);
     (void)Finder.Visit(Context.getCanonicalType(Arg));
   }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
index 0bc85a2f2635..7f1fd91c46f0 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -288,19 +288,22 @@ checkDeducedTemplateArguments(ASTContext &Context,
         X.pack_size() != Y.pack_size())
       return DeducedTemplateArgument();
 
+    llvm::SmallVector<TemplateArgument, 8> NewPack;
     for (TemplateArgument::pack_iterator XA = X.pack_begin(),
                                       XAEnd = X.pack_end(),
                                          YA = Y.pack_begin();
          XA != XAEnd; ++XA, ++YA) {
-      // FIXME: Do we need to merge the results together here?
-      if (checkDeducedTemplateArguments(Context,
-                    DeducedTemplateArgument(*XA, X.wasDeducedFromArrayBound()),
-                    DeducedTemplateArgument(*YA, Y.wasDeducedFromArrayBound()))
-            .isNull())
+      TemplateArgument Merged = checkDeducedTemplateArguments(
+          Context, DeducedTemplateArgument(*XA, X.wasDeducedFromArrayBound()),
+          DeducedTemplateArgument(*YA, Y.wasDeducedFromArrayBound()));
+      if (Merged.isNull())
         return DeducedTemplateArgument();
+      NewPack.push_back(Merged);
     }
 
-    return X;
+    return DeducedTemplateArgument(
+        TemplateArgument::CreatePackCopy(Context, NewPack),
+        X.wasDeducedFromArrayBound() && Y.wasDeducedFromArrayBound());
   }
 
   llvm_unreachable("Invalid TemplateArgument Kind!");
@@ -672,17 +675,20 @@ public:
     // for that pack, then clear out the deduced argument.
     for (auto &Pack : Packs) {
       DeducedTemplateArgument &DeducedArg = Deduced[Pack.Index];
-      if (!DeducedArg.isNull()) {
+      if (!Pack.New.empty() || !DeducedArg.isNull()) {
+        while (Pack.New.size() < PackElements)
+          Pack.New.push_back(DeducedTemplateArgument());
         Pack.New.push_back(DeducedArg);
         DeducedArg = DeducedTemplateArgument();
       }
     }
+    ++PackElements;
   }
 
   /// \brief Finish template argument deduction for a set of argument packs,
   /// producing the argument packs and checking for consistency with prior
   /// deductions.
-  Sema::TemplateDeductionResult finish(bool HasAnyArguments) {
+  Sema::TemplateDeductionResult finish() {
     // Build argument packs for each of the parameter packs expanded by this
     // pack expansion.
     for (auto &Pack : Packs) {
@@ -691,7 +697,7 @@ public:
 
       // Build or find a new value for this pack.
       DeducedTemplateArgument NewPack;
-      if (HasAnyArguments && Pack.New.empty()) {
+      if (PackElements && Pack.New.empty()) {
         if (Pack.DeferredDeduction.isNull()) {
           // We were not able to deduce anything for this parameter pack
           // (because it only appeared in non-deduced contexts), so just
@@ -758,6 +764,7 @@ private:
   TemplateParameterList *TemplateParams;
   SmallVectorImpl<DeducedTemplateArgument> &Deduced;
   TemplateDeductionInfo &Info;
+  unsigned PackElements = 0;
 
   SmallVector<DeducedPack, 2> Packs;
 };
@@ -861,10 +868,7 @@ DeduceTemplateArguments(Sema &S,
     QualType Pattern = Expansion->getPattern();
     PackDeductionScope PackScope(S, TemplateParams, Deduced, Info, Pattern);
 
-    bool HasAnyArguments = false;
     for (; ArgIdx < NumArgs; ++ArgIdx) {
-      HasAnyArguments = true;
-
       // Deduce template arguments from the pattern.
       if (Sema::TemplateDeductionResult Result
             = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, Pattern,
@@ -877,7 +881,7 @@ DeduceTemplateArguments(Sema &S,
 
     // Build argument packs for each of the parameter packs expanded by this
     // pack expansion.
-    if (auto Result = PackScope.finish(HasAnyArguments))
+    if (auto Result = PackScope.finish())
       return Result;
   }
 
@@ -1935,10 +1939,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
     // Keep track of the deduced template arguments for each parameter pack
     // expanded by this pack expansion (the outer index) and for each
     // template argument (the inner SmallVectors).
-    bool HasAnyArguments = false;
     for (; hasTemplateArgumentForDeduction(Args, ArgIdx); ++ArgIdx) {
-      HasAnyArguments = true;
-
       // Deduce template arguments from the pattern.
       if (Sema::TemplateDeductionResult Result
             = DeduceTemplateArguments(S, TemplateParams, Pattern, Args[ArgIdx],
@@ -1950,7 +1951,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
 
     // Build argument packs for each of the parameter packs expanded by this
     // pack expansion.
-    if (auto Result = PackScope.finish(HasAnyArguments))
+    if (auto Result = PackScope.finish())
       return Result;
   }
 
@@ -2145,6 +2146,16 @@ ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param,
       InnerArg.setDeducedFromArrayBound(Arg.wasDeducedFromArrayBound());
       assert(InnerArg.getKind() != TemplateArgument::Pack &&
              "deduced nested pack");
+      if (P.isNull()) {
+        // We deduced arguments for some elements of this pack, but not for
+        // all of them. This happens if we get a conditionally-non-deduced
+        // context in a pack expansion (such as an overload set in one of the
+        // arguments).
+        S.Diag(Param->getLocation(),
+               diag::err_template_arg_deduced_incomplete_pack)
+          << Arg << Param;
+        return true;
+      }
       if (ConvertArg(InnerArg, PackedArgsBuilder.size()))
         return true;
 
@@ -3192,67 +3203,59 @@ static Sema::TemplateDeductionResult DeduceTemplateArgumentByListElement(
 
 /// \brief Attempt template argument deduction from an initializer list
 ///        deemed to be an argument in a function call.
-static bool
+static Sema::TemplateDeductionResult
 DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams,
                           QualType AdjustedParamType, InitListExpr *ILE,
                           TemplateDeductionInfo &Info,
                           SmallVectorImpl<DeducedTemplateArgument> &Deduced,
-                          unsigned TDF, Sema::TemplateDeductionResult &Result) {
-
-  // [temp.deduct.call] p1 (post CWG-1591)
-  // If removing references and cv-qualifiers from P gives
-  // std::initializer_list<P0> or P0[N] for some P0 and N and the argument is a
-  // non-empty initializer list (8.5.4), then deduction is performed instead for
-  // each element of the initializer list, taking P0 as a function template
-  // parameter type and the initializer element as its argument, and in the
-  // P0[N] case, if N is a non-type template parameter, N is deduced from the
-  // length of the initializer list. Otherwise, an initializer list argument
-  // causes the parameter to be considered a non-deduced context
-
-  const bool IsConstSizedArray = AdjustedParamType->isConstantArrayType();
-
-  const bool IsDependentSizedArray =
-      !IsConstSizedArray && AdjustedParamType->isDependentSizedArrayType();
-
-  QualType ElTy;  // The element type of the std::initializer_list or the array.
-
-  const bool IsSTDList = !IsConstSizedArray && !IsDependentSizedArray &&
-                         S.isStdInitializerList(AdjustedParamType, &ElTy);
-
-  if (!IsConstSizedArray && !IsDependentSizedArray && !IsSTDList)
-    return false;
-
-  Result = Sema::TDK_Success;
-  // If we are not deducing against the 'T' in a std::initializer_list<T> then
-  // deduce against the 'T' in T[N].
-  if (ElTy.isNull()) {
-    assert(!IsSTDList);
-    ElTy = S.Context.getAsArrayType(AdjustedParamType)->getElementType();
+                          unsigned TDF) {
+  // C++ [temp.deduct.call]p1: (CWG 1591)
+  //   If removing references and cv-qualifiers from P gives
+  //   std::initializer_list<P0> or P0[N] for some P0 and N and the argument is
+  //   a non-empty initializer list, then deduction is performed instead for
+  //   each element of the initializer list, taking P0 as a function template
+  //   parameter type and the initializer element as its argument
+  //
+  // FIXME: Remove references and cv-qualifiers here? Consider
+  //          std::initializer_list<std::initializer_list<T>&&>
+  QualType ElTy;
+  auto *ArrTy = S.Context.getAsArrayType(AdjustedParamType);
+  if (ArrTy)
+    ElTy = ArrTy->getElementType();
+  else if (!S.isStdInitializerList(AdjustedParamType, &ElTy)) {
+    //   Otherwise, an initializer list argument causes the parameter to be
+    //   considered a non-deduced context
+    return Sema::TDK_Success;
   }
+
   // Deduction only needs to be done for dependent types.
   if (ElTy->isDependentType()) {
     for (Expr *E : ILE->inits()) {
-      if ((Result = DeduceTemplateArgumentByListElement(S, TemplateParams, ElTy,
-                                                        E, Info, Deduced, TDF)))
-        return true;
+      if (auto Result = DeduceTemplateArgumentByListElement(
+              S, TemplateParams, ElTy, E, Info, Deduced, TDF))
+        return Result;
     }
   }
-  if (IsDependentSizedArray) {
-    const DependentSizedArrayType *ArrTy =
-        S.Context.getAsDependentSizedArrayType(AdjustedParamType);
+
+  //   in the P0[N] case, if N is a non-type template parameter, N is deduced
+  //   from the length of the initializer list.
+  // FIXME: We're not supposed to get here if N would be deduced as 0.
+  if (auto *DependentArrTy = dyn_cast_or_null<DependentSizedArrayType>(ArrTy)) {
     // Determine the array bound is something we can deduce.
     if (NonTypeTemplateParmDecl *NTTP =
-            getDeducedParameterFromExpr(Info, ArrTy->getSizeExpr())) {
+            getDeducedParameterFromExpr(Info, DependentArrTy->getSizeExpr())) {
       // We can perform template argument deduction for the given non-type
       // template parameter.
       llvm::APInt Size(S.Context.getIntWidth(NTTP->getType()),
                        ILE->getNumInits());
-      Result = DeduceNonTypeTemplateArgument(
-          S, TemplateParams, NTTP, llvm::APSInt(Size), NTTP->getType(),
-          /*ArrayBound=*/true, Info, Deduced);
+      if (auto Result = DeduceNonTypeTemplateArgument(
+              S, TemplateParams, NTTP, llvm::APSInt(Size), NTTP->getType(),
+              /*ArrayBound=*/true, Info, Deduced))
+        return Result;
     }
   }
-  return true;
+
+  return Sema::TDK_Success;
 }
 
 /// \brief Perform template argument deduction by matching a parameter type
@@ -3268,15 +3271,10 @@ DeduceTemplateArgumentByListElement(Sema &S,
                                     unsigned TDF) {
   // Handle the case where an init list contains another init list as the
   // element.
-  if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg)) {
-    Sema::TemplateDeductionResult Result;
-    if (!DeduceFromInitializerList(S, TemplateParams,
-                                   ParamType.getNonReferenceType(), ILE, Info,
-                                   Deduced, TDF, Result))
-      return Sema::TDK_Success; // Just ignore this expression.
-
-    return Result;
-  }
+  if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg))
+    return DeduceFromInitializerList(S, TemplateParams,
+                                     ParamType.getNonReferenceType(), ILE, Info,
+                                     Deduced, TDF);
 
   // For all other cases, just match by type.
   QualType ArgType = Arg->getType();
@@ -3363,58 +3361,51 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
       ParamTypes.push_back(Function->getParamDecl(I)->getType());
   }
 
+  SmallVector<OriginalCallArg, 4> OriginalCallArgs;
+
+  // Deduce an argument of type ParamType from an expression with index ArgIdx.
+  auto DeduceCallArgument = [&](QualType ParamType, unsigned ArgIdx) {
+    Expr *Arg = Args[ArgIdx];
+    QualType ArgType = Arg->getType();
+    QualType OrigParamType = ParamType;
+
+    unsigned TDF = 0;
+    if (AdjustFunctionParmAndArgTypesForDeduction(*this, TemplateParams,
+                                                  ParamType, ArgType, Arg,
+                                                  TDF))
+      return Sema::TDK_Success;
+
+    // If we have nothing to deduce, we're done.
+    if (!hasDeducibleTemplateParameters(*this, FunctionTemplate, ParamType))
+      return Sema::TDK_Success;
+
+    // If the argument is an initializer list ...
+    if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg))
+      return DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE,
+                                       Info, Deduced, TDF);
+
+    // Keep track of the argument type and corresponding parameter index,
+    // so we can check for compatibility between the deduced A and A.
+    OriginalCallArgs.push_back(OriginalCallArg(OrigParamType, ArgIdx, ArgType));
+
+    return DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams, ParamType,
+                                              ArgType, Info, Deduced, TDF);
+  };
+
   // Deduce template arguments from the function parameters.
   Deduced.resize(TemplateParams->size());
-  unsigned ArgIdx = 0;
-  SmallVector<OriginalCallArg, 4> OriginalCallArgs;
-  for (unsigned ParamIdx = 0, NumParamTypes = ParamTypes.size();
+  for (unsigned ParamIdx = 0, NumParamTypes = ParamTypes.size(), ArgIdx = 0;
        ParamIdx != NumParamTypes; ++ParamIdx) {
-    QualType OrigParamType = ParamTypes[ParamIdx];
-    QualType ParamType = OrigParamType;
+    QualType ParamType = ParamTypes[ParamIdx];
 
-    const PackExpansionType *ParamExpansion
-      = dyn_cast<PackExpansionType>(ParamType);
+    const PackExpansionType *ParamExpansion =
+        dyn_cast<PackExpansionType>(ParamType);
     if (!ParamExpansion) {
       // Simple case: matching a function parameter to a function argument.
       if (ArgIdx >= CheckArgs)
         break;
 
-      Expr *Arg = Args[ArgIdx++];
-      QualType ArgType = Arg->getType();
-
-      unsigned TDF = 0;
-      if (AdjustFunctionParmAndArgTypesForDeduction(*this, TemplateParams,
-                                                    ParamType, ArgType, Arg,
-                                                    TDF))
-        continue;
-
-      // If we have nothing to deduce, we're done.
-      if (!hasDeducibleTemplateParameters(*this, FunctionTemplate, ParamType))
-        continue;
-
-      // If the argument is an initializer list ...
-      if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg)) {
-        TemplateDeductionResult Result;
-        // Removing references was already done.
-        if (!DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE,
-                                       Info, Deduced, TDF, Result))
-          continue;
-
-        if (Result)
-          return Result;
-        // Don't track the argument type, since an initializer list has none.
-        continue;
-      }
-
-      // Keep track of the argument type and corresponding parameter index,
-      // so we can check for compatibility between the deduced A and A.
-      OriginalCallArgs.push_back(OriginalCallArg(OrigParamType, ArgIdx-1,
-                                                 ArgType));
-
-      if (TemplateDeductionResult Result
-            = DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams,
-                                                 ParamType, ArgType,
-                                                 Info, Deduced, TDF))
+      if (auto Result = DeduceCallArgument(ParamType, ArgIdx++))
         return Result;
 
       continue;
@@ -3429,6 +3420,9 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
     //   the function parameter pack. For a function parameter pack that does
     //   not occur at the end of the parameter-declaration-list, the type of
     //   the parameter pack is a non-deduced context.
+    // FIXME: This does not say that subsequent parameters are also non-deduced.
+    // See also DR1388 / DR1399, which effectively says we should keep deducing
+    // after the pack.
     if (ParamIdx + 1 < NumParamTypes)
       break;
 
@@ -3436,57 +3430,13 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
     PackDeductionScope PackScope(*this, TemplateParams, Deduced, Info,
                                  ParamPattern);
 
-    bool HasAnyArguments = false;
-    for (; ArgIdx < Args.size(); ++ArgIdx) {
-      HasAnyArguments = true;
-
-      QualType OrigParamType = ParamPattern;
-      ParamType = OrigParamType;
-      Expr *Arg = Args[ArgIdx];
-      QualType ArgType = Arg->getType();
-
-      unsigned TDF = 0;
-      if (AdjustFunctionParmAndArgTypesForDeduction(*this, TemplateParams,
-                                                    ParamType, ArgType, Arg,
-                                                    TDF)) {
-        // We can't actually perform any deduction for this argument, so stop
-        // deduction at this point.
-        ++ArgIdx;
-        break;
-      }
-
-      // As above, initializer lists need special handling.
-      if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg)) {
-        TemplateDeductionResult Result;
-        if (!DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE,
-                                       Info, Deduced, TDF, Result)) {
-          ++ArgIdx;
-          break;
-        }
-
-        if (Result)
-          return Result;
-      } else {
-
-        // Keep track of the argument type and corresponding argument index,
-        // so we can check for compatibility between the deduced A and A.
-        if (hasDeducibleTemplateParameters(*this, FunctionTemplate, ParamType))
-          OriginalCallArgs.push_back(OriginalCallArg(OrigParamType, ArgIdx,
-                                                     ArgType));
-
-        if (TemplateDeductionResult Result
-            = DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams,
-                                                 ParamType, ArgType, Info,
-                                                 Deduced, TDF))
-          return Result;
-      }
-
-      PackScope.nextPackElement();
-    }
+    for (; ArgIdx < Args.size(); PackScope.nextPackElement(), ++ArgIdx)
+      if (auto Result = DeduceCallArgument(ParamPattern, ArgIdx))
+        return Result;
 
     // Build argument packs for each of the parameter packs expanded by this
     // pack expansion.
-    if (auto Result = PackScope.finish(HasAnyArguments))
+    if (auto Result = PackScope.finish())
       return Result;
 
     // After we've matching against a parameter pack, we're done.
diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
index 3ab6019f0ec3..66892936e573 100644
--- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
+++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
@@ -7766,6 +7766,20 @@ TreeTransform<Derived>::TransformOMPTargetTeamsDistributeParallelForDirective(
   return Res;
 }
 
+template <typename Derived>
+StmtResult TreeTransform<Derived>::
+    TransformOMPTargetTeamsDistributeParallelForSimdDirective(
+        OMPTargetTeamsDistributeParallelForSimdDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(
+      OMPD_target_teams_distribute_parallel_for_simd, DirName, nullptr,
+      D->getLocStart());
+  auto Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+
 //===----------------------------------------------------------------------===//
 // OpenMP clause transformation
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
index 5607f764a9c3..19fac55664ae 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2860,6 +2860,11 @@ void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective(
   VisitOMPLoopDirective(D);
 }
 
+void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
+    OMPTargetTeamsDistributeParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 //===----------------------------------------------------------------------===//
 // ASTReader Implementation
 //===----------------------------------------------------------------------===//
@@ -3638,6 +3643,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
     }
 
+    case STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE: {
+      auto NumClauses = Record[ASTStmtReader::NumStmtFields];
+      auto CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPTargetTeamsDistributeParallelForSimdDirective::CreateEmpty(
+          Context, NumClauses, CollapsedNum, Empty);
+      break;
+    }
+
     case EXPR_CXX_OPERATOR_CALL:
       S = new (Context) CXXOperatorCallExpr(Context, Empty);
       break;
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
index 3993be146edf..162b2bd25260 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2556,6 +2556,13 @@ void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective(
   Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
+    OMPTargetTeamsDistributeParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::
+      STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE;
+}
+
 //===----------------------------------------------------------------------===//
 // ASTWriter Implementation
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 5b2119aeda27..707168b4de0a 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -869,6 +869,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPTargetTeamsDirectiveClass:
     case Stmt::OMPTargetTeamsDistributeDirectiveClass:
     case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
+    case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
       llvm_unreachable("Stmt should not be in analyzer evaluation loop");
 
     case Stmt::ObjCSubscriptRefExprClass:
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Core/Diagnostic.cpp b/contrib/llvm/tools/clang/lib/Tooling/Core/Diagnostic.cpp
new file mode 100644
index 000000000000..3bbc2b901e38
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Tooling/Core/Diagnostic.cpp
@@ -0,0 +1,46 @@
+//===--- Diagnostic.cpp - Framework for clang diagnostics tools ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Implements classes to support/store diagnostics refactoring.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Core/Diagnostic.h"
+#include "clang/Basic/SourceManager.h"
+
+namespace clang {
+namespace tooling {
+
+DiagnosticMessage::DiagnosticMessage(llvm::StringRef Message)
+    : Message(Message), FileOffset(0) {}
+
+DiagnosticMessage::DiagnosticMessage(llvm::StringRef Message,
+                                     const SourceManager &Sources,
+                                     SourceLocation Loc)
+    : Message(Message) {
+  assert(Loc.isValid() && Loc.isFileID());
+  FilePath = Sources.getFilename(Loc);
+  FileOffset = Sources.getFileOffset(Loc);
+}
+
+Diagnostic::Diagnostic(llvm::StringRef DiagnosticName,
+                       Diagnostic::Level DiagLevel, StringRef BuildDirectory)
+    : DiagnosticName(DiagnosticName), DiagLevel(DiagLevel),
+      BuildDirectory(BuildDirectory) {}
+
+Diagnostic::Diagnostic(llvm::StringRef DiagnosticName,
+                       DiagnosticMessage &Message,
+                       llvm::StringMap<Replacements> &Fix,
+                       SmallVector<DiagnosticMessage, 1> &Notes,
+                       Level DiagLevel, llvm::StringRef BuildDirectory)
+    : DiagnosticName(DiagnosticName), Message(Message), Fix(Fix), Notes(Notes),
+      DiagLevel(DiagLevel), BuildDirectory(BuildDirectory) {}
+
+} // end namespace tooling
+} // end namespace clang
diff --git a/contrib/llvm/tools/lld/COFF/PDB.cpp b/contrib/llvm/tools/lld/COFF/PDB.cpp
index 56d5a3651143..d5c52a69be69 100644
--- a/contrib/llvm/tools/lld/COFF/PDB.cpp
+++ b/contrib/llvm/tools/lld/COFF/PDB.cpp
@@ -13,6 +13,7 @@
 #include "Error.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
+#include "llvm/DebugInfo/CodeView/CVDebugRecord.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumper.h"
 #include "llvm/DebugInfo/CodeView/TypeDumper.h"
 #include "llvm/DebugInfo/MSF/ByteStream.h"
@@ -131,7 +132,8 @@ static void addTypeInfo(SymbolTable *Symtab,
 
 // Creates a PDB file.
 void coff::createPDB(StringRef Path, SymbolTable *Symtab,
-                     ArrayRef<uint8_t> SectionTable) {
+                     ArrayRef<uint8_t> SectionTable,
+                     const llvm::codeview::DebugInfo *DI) {
   if (Config->DumpPdb)
     dumpCodeView(Symtab);
 
@@ -146,11 +148,9 @@ void coff::createPDB(StringRef Path, SymbolTable *Symtab,
 
   // Add an Info stream.
   auto &InfoBuilder = Builder.getInfoBuilder();
-  InfoBuilder.setAge(1);
-
-  // Should be a random number, 0 for now.
-  InfoBuilder.setGuid({});
-
+  InfoBuilder.setAge(DI->PDB70.Age);
+  InfoBuilder.setGuid(
+      *reinterpret_cast<const pdb::PDB_UniqueId *>(&DI->PDB70.Signature));
   // Should be the current time, but set 0 for reproducibilty.
   InfoBuilder.setSignature(0);
   InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70);
diff --git a/contrib/llvm/tools/lld/COFF/PDB.h b/contrib/llvm/tools/lld/COFF/PDB.h
index 091e90fa1ef1..c9c37914299a 100644
--- a/contrib/llvm/tools/lld/COFF/PDB.h
+++ b/contrib/llvm/tools/lld/COFF/PDB.h
@@ -13,12 +13,19 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 
+namespace llvm {
+namespace codeview {
+union DebugInfo;
+}
+}
+
 namespace lld {
 namespace coff {
 class SymbolTable;
 
 void createPDB(llvm::StringRef Path, SymbolTable *Symtab,
-               llvm::ArrayRef<uint8_t> SectionTable);
+               llvm::ArrayRef<uint8_t> SectionTable,
+               const llvm::codeview::DebugInfo *DI);
 }
 }
 
diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp
index 3e69aebbb424..71217ebeb60a 100644
--- a/contrib/llvm/tools/lld/COFF/Writer.cpp
+++ b/contrib/llvm/tools/lld/COFF/Writer.cpp
@@ -304,7 +304,7 @@ void Writer::run() {
   writeBuildId();
 
   if (!Config->PDBPath.empty())
-    createPDB(Config->PDBPath, Symtab, SectionTable);
+    createPDB(Config->PDBPath, Symtab, SectionTable, BuildId->DI);
 
   if (auto EC = Buffer->commit())
     fatal(EC, "failed to write the output file");
diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp
index 2a8659921463..f4128c5096cb 100644
--- a/contrib/llvm/tools/lld/ELF/InputFiles.cpp
+++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp
@@ -461,7 +461,7 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
     StringRefZ Name = this->StringTable.data() + Sym->st_name;
     if (Sym->st_shndx == SHN_UNDEF)
       return new (BAlloc)
-          Undefined(Name, /*IsLocal=*/true, StOther, Type, this);
+          Undefined<ELFT>(Name, /*IsLocal=*/true, StOther, Type, this);
 
     return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther,
                                              Type, Value, Size, Sec, this);
diff --git a/contrib/llvm/tools/lld/ELF/LTO.cpp b/contrib/llvm/tools/lld/ELF/LTO.cpp
index a3d6a141a202..b342b6195f1d 100644
--- a/contrib/llvm/tools/lld/ELF/LTO.cpp
+++ b/contrib/llvm/tools/lld/ELF/LTO.cpp
@@ -96,12 +96,12 @@ BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {}
 
 BitcodeCompiler::~BitcodeCompiler() = default;
 
-static void undefine(Symbol *S) {
-  replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false,
-                         STV_DEFAULT, S->body()->Type, nullptr);
+template <class ELFT> static void undefine(Symbol *S) {
+  replaceBody<Undefined<ELFT>>(S, S->body()->getName(), /*IsLocal=*/false,
+                               STV_DEFAULT, S->body()->Type, nullptr);
 }
 
-void BitcodeCompiler::add(BitcodeFile &F) {
+template <class ELFT> void BitcodeCompiler::add(BitcodeFile &F) {
   lto::InputFile &Obj = *F.Obj;
   unsigned SymNum = 0;
   std::vector<Symbol *> Syms = F.getSymbols();
@@ -126,7 +126,7 @@ void BitcodeCompiler::add(BitcodeFile &F) {
     R.VisibleToRegularObj =
         Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym());
     if (R.Prevailing)
-      undefine(Sym);
+      undefine<ELFT>(Sym);
   }
   checkError(LTOObj->add(std::move(F.Obj), Resols));
 }
@@ -157,3 +157,8 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
   }
   return Ret;
 }
+
+template void BitcodeCompiler::template add<ELF32LE>(BitcodeFile &);
+template void BitcodeCompiler::template add<ELF32BE>(BitcodeFile &);
+template void BitcodeCompiler::template add<ELF64LE>(BitcodeFile &);
+template void BitcodeCompiler::template add<ELF64BE>(BitcodeFile &);
diff --git a/contrib/llvm/tools/lld/ELF/LTO.h b/contrib/llvm/tools/lld/ELF/LTO.h
index b3d734f2d381..3cb763650e1c 100644
--- a/contrib/llvm/tools/lld/ELF/LTO.h
+++ b/contrib/llvm/tools/lld/ELF/LTO.h
@@ -43,7 +43,7 @@ public:
   BitcodeCompiler();
   ~BitcodeCompiler();
 
-  void add(BitcodeFile &F);
+  template <class ELFT> void add(BitcodeFile &F);
   std::vector<InputFile *> compile();
 
 private:
diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp
index 79097e176e68..f08fa6229c1a 100644
--- a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp
+++ b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp
@@ -115,7 +115,7 @@ template <class ELFT> void SymbolTable<ELFT>::addCombinedLTOObject() {
   // Compile bitcode files and replace bitcode symbols.
   LTO.reset(new BitcodeCompiler);
   for (BitcodeFile *F : BitcodeFiles)
-    LTO->add(*F);
+    LTO->add<ELFT>(*F);
 
   for (InputFile *File : LTO->compile()) {
     ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(File);
@@ -256,7 +256,7 @@ Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal,
       insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, File);
   if (WasInserted) {
     S->Binding = Binding;
-    replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File);
+    replaceBody<Undefined<ELFT>>(S, Name, IsLocal, StOther, Type, File);
     return S;
   }
   if (Binding != STB_WEAK) {
@@ -432,7 +432,7 @@ void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name,
     if (S->VersionId == VER_NDX_LOCAL)
       S->VersionId = VER_NDX_GLOBAL;
   }
-  if (WasInserted || isa<Undefined>(S->body())) {
+  if (WasInserted || isa<Undefined<ELFT>>(S->body())) {
     replaceBody<SharedSymbol<ELFT>>(S, F, Name, Sym, Verdef);
     if (!S->isWeak())
       F->IsUsed = true;
diff --git a/contrib/llvm/tools/lld/ELF/Symbols.cpp b/contrib/llvm/tools/lld/ELF/Symbols.cpp
index a2133f411c20..f168d37bdf0a 100644
--- a/contrib/llvm/tools/lld/ELF/Symbols.cpp
+++ b/contrib/llvm/tools/lld/ELF/Symbols.cpp
@@ -173,6 +173,8 @@ template <class ELFT> typename ELFT::uint SymbolBody::getThunkVA() const {
     return DR->ThunkData->getVA();
   if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this))
     return S->ThunkData->getVA();
+  if (const auto *S = dyn_cast<Undefined<ELFT>>(this))
+    return S->ThunkData->getVA();
   fatal("getThunkVA() not supported for Symbol class\n");
 }
 
@@ -232,8 +234,9 @@ template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
          (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC);
 }
 
-Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther,
-                     uint8_t Type, InputFile *File)
+template <typename ELFT>
+Undefined<ELFT>::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther,
+                           uint8_t Type, InputFile *File)
     : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) {
   this->File = File;
 }
@@ -354,6 +357,11 @@ template uint32_t SymbolBody::template getSize<ELF32BE>() const;
 template uint64_t SymbolBody::template getSize<ELF64LE>() const;
 template uint64_t SymbolBody::template getSize<ELF64BE>() const;
 
+template class elf::Undefined<ELF32LE>;
+template class elf::Undefined<ELF32BE>;
+template class elf::Undefined<ELF64LE>;
+template class elf::Undefined<ELF64BE>;
+
 template class elf::DefinedRegular<ELF32LE>;
 template class elf::DefinedRegular<ELF32BE>;
 template class elf::DefinedRegular<ELF64LE>;
diff --git a/contrib/llvm/tools/lld/ELF/Symbols.h b/contrib/llvm/tools/lld/ELF/Symbols.h
index c95241a5293e..cbf8fa81a138 100644
--- a/contrib/llvm/tools/lld/ELF/Symbols.h
+++ b/contrib/llvm/tools/lld/ELF/Symbols.h
@@ -236,7 +236,7 @@ public:
   const OutputSectionBase *Section;
 };
 
-class Undefined : public SymbolBody {
+template <class ELFT> class Undefined : public SymbolBody {
 public:
   Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
             InputFile *F);
@@ -245,6 +245,12 @@ public:
     return S->kind() == UndefinedKind;
   }
 
+  // If non-null the symbol has a Thunk that may be used as an alternative
+  // destination for callers of this Symbol. When linking a DSO undefined
+  // symbols are implicitly imported, the symbol lookup will be performed by
+  // the dynamic loader. A call to an undefined symbol will be given a PLT
+  // entry and on ARM this may need a Thunk if the caller is in Thumb state.
+  Thunk<ELFT> *ThunkData = nullptr;
   InputFile *file() { return this->File; }
 };
 
@@ -416,7 +422,8 @@ struct Symbol {
   // ELFT, and we verify this with the static_asserts in replaceBody.
   llvm::AlignedCharArrayUnion<
       DefinedCommon, DefinedRegular<llvm::object::ELF64LE>, DefinedSynthetic,
-      Undefined, SharedSymbol<llvm::object::ELF64LE>, LazyArchive, LazyObject>
+      Undefined<llvm::object::ELF64LE>, SharedSymbol<llvm::object::ELF64LE>,
+      LazyArchive, LazyObject>
       Body;
 
   SymbolBody *body() { return reinterpret_cast<SymbolBody *>(Body.buffer); }
diff --git a/contrib/llvm/tools/lld/ELF/Target.cpp b/contrib/llvm/tools/lld/ELF/Target.cpp
index edae7c65c1b4..d82e654b9c4c 100644
--- a/contrib/llvm/tools/lld/ELF/Target.cpp
+++ b/contrib/llvm/tools/lld/ELF/Target.cpp
@@ -1730,8 +1730,11 @@ void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr,
 RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType,
                                     const InputFile &File,
                                     const SymbolBody &S) const {
-  // If S is an undefined weak symbol we don't need a Thunk
-  if (S.isUndefined())
+  // If S is an undefined weak symbol in an executable we don't need a Thunk.
+  // In a DSO calls to undefined symbols, including weak ones get PLT entries
+  // which may need a thunk.
+  if (S.isUndefined() && !S.isLocal() && S.symbol()->isWeak()
+      && !Config->Shared)
     return Expr;
   // A state change from ARM to Thumb and vice versa must go through an
   // interworking thunk if the relocation type is not R_ARM_CALL or
diff --git a/contrib/llvm/tools/lld/ELF/Thunks.cpp b/contrib/llvm/tools/lld/ELF/Thunks.cpp
index 34b630ac2510..397a0ee66319 100644
--- a/contrib/llvm/tools/lld/ELF/Thunks.cpp
+++ b/contrib/llvm/tools/lld/ELF/Thunks.cpp
@@ -226,6 +226,8 @@ static void addThunkARM(uint32_t Reloc, SymbolBody &S, InputSection<ELFT> &IS) {
     Sym->ThunkData = T;
   else if (auto *Sym = dyn_cast<SharedSymbol<ELFT>>(&S))
     Sym->ThunkData = T;
+  else if (auto *Sym = dyn_cast<Undefined<ELFT>>(&S))
+    Sym->ThunkData = T;
   else
     fatal("symbol not DefinedRegular or Shared");
 }
diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp
index e056384fbd44..154de8cf6d18 100644
--- a/contrib/llvm/tools/lld/ELF/Writer.cpp
+++ b/contrib/llvm/tools/lld/ELF/Writer.cpp
@@ -625,15 +625,12 @@ void PhdrEntry::add(OutputSectionBase *Sec) {
 }
 
 template <class ELFT>
-static Symbol *addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec,
-                                    typename ELFT::uint Val,
-                                    uint8_t StOther = STV_HIDDEN) {
-  SymbolBody *S = Symtab<ELFT>::X->find(Name);
-  if (!S)
-    return nullptr;
-  if (!S->isUndefined() && !S->isShared())
-    return S->symbol();
-  return Symtab<ELFT>::X->addSynthetic(Name, Sec, Val, StOther);
+static void addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec,
+                                 typename ELFT::uint Val,
+                                 uint8_t StOther = STV_HIDDEN) {
+  if (SymbolBody *S = Symtab<ELFT>::X->find(Name))
+    if (S->isUndefined() || S->isShared())
+      Symtab<ELFT>::X->addSynthetic(Name, Sec, Val, StOther);
 }
 
 template <class ELFT>
@@ -1447,8 +1444,13 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() {
     }
     if (P.p_type == PT_LOAD)
       P.p_align = Config->MaxPageSize;
-    else if (P.p_type == PT_GNU_RELRO)
+    else if (P.p_type == PT_GNU_RELRO) {
       P.p_align = 1;
+      // The glibc dynamic loader rounds the size down, so we need to round up
+      // to protect the last page. This is a no-op on FreeBSD which always
+      // rounds up.
+      P.p_memsz = alignTo(P.p_memsz, Config->MaxPageSize);
+    }
 
     // The TLS pointer goes after PT_TLS. At least glibc will align it,
     // so round up the size to make sure the offsets are correct.
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
index 82473fba065b..ae907ac8dfbb 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
@@ -2854,6 +2854,11 @@ bool RenderScriptRuntime::LoadModule(const lldb::ModuleSP &module_sp) {
       module_desc.reset(new RSModuleDescriptor(module_sp));
       if (module_desc->ParseRSInfo()) {
         m_rsmodules.push_back(module_desc);
+        module_desc->WarnIfVersionMismatch(GetProcess()
+                                               ->GetTarget()
+                                               .GetDebugger()
+                                               .GetAsyncOutputStream()
+                                               .get());
         module_loaded = true;
       }
       if (module_loaded) {
@@ -2923,6 +2928,25 @@ void RenderScriptRuntime::Update() {
   }
 }
 
+void RSModuleDescriptor::WarnIfVersionMismatch(lldb_private::Stream *s) const {
+  if (!s)
+    return;
+
+  if (m_slang_version.empty() || m_bcc_version.empty()) {
+    s->PutCString("WARNING: Unknown bcc or slang (llvm-rs-cc) version; debug "
+                  "experience may be unreliable");
+    s->EOL();
+  } else if (m_slang_version != m_bcc_version) {
+    s->Printf("WARNING: The debug info emitted by the slang frontend "
+              "(llvm-rs-cc) used to build this module (%s) does not match the "
+              "version of bcc used to generate the debug information (%s). "
+              "This is an unsupported configuration and may result in a poor "
+              "debugging experience; proceed with caution",
+              m_slang_version.c_str(), m_bcc_version.c_str());
+    s->EOL();
+  }
+}
+
 bool RSModuleDescriptor::ParsePragmaCount(llvm::StringRef *lines,
                                           size_t n_lines) {
   // Skip the pragma prototype line
@@ -2990,6 +3014,22 @@ bool RSModuleDescriptor::ParseExportReduceCount(llvm::StringRef *lines,
   return true;
 }
 
+bool RSModuleDescriptor::ParseVersionInfo(llvm::StringRef *lines,
+                                          size_t n_lines) {
+  // Skip the versionInfo line
+  ++lines;
+  for (; n_lines--; ++lines) {
+    // We're only interested in bcc and slang versions, and ignore all other
+    // versionInfo lines
+    const auto kv_pair = lines->split(" - ");
+    if (kv_pair.first == "slang")
+      m_slang_version = kv_pair.second.str();
+    else if (kv_pair.first == "bcc")
+      m_bcc_version = kv_pair.second.str();
+  }
+  return true;
+}
+
 bool RSModuleDescriptor::ParseExportForeachCount(llvm::StringRef *lines,
                                                  size_t n_lines) {
   // Skip the exportForeachCount line
@@ -3054,7 +3094,8 @@ bool RSModuleDescriptor::ParseRSInfo() {
     eExportReduce,
     ePragma,
     eBuildChecksum,
-    eObjectSlot
+    eObjectSlot,
+    eVersionInfo,
   };
 
   const auto rs_info_handler = [](llvm::StringRef name) -> int {
@@ -3070,6 +3111,7 @@ bool RSModuleDescriptor::ParseRSInfo() {
         // script
         .Case("pragmaCount", ePragma)
         .Case("objectSlotCount", eObjectSlot)
+        .Case("versionInfo", eVersionInfo)
         .Default(-1);
   };
 
@@ -3108,6 +3150,9 @@ bool RSModuleDescriptor::ParseRSInfo() {
     case ePragma:
       success = ParsePragmaCount(line, n_lines);
       break;
+    case eVersionInfo:
+      success = ParseVersionInfo(line, n_lines);
+      break;
     default: {
       if (log)
         log->Printf("%s - skipping .rs.info field '%s'", __FUNCTION__,
diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
index a1211a2814b7..5b2bb57ac8c8 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
+++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
@@ -203,6 +203,11 @@ struct RSReductionDescriptor {
 };
 
 class RSModuleDescriptor {
+  std::string m_slang_version;
+  std::string m_bcc_version;
+
+  bool ParseVersionInfo(llvm::StringRef *, size_t n_lines);
+
   bool ParseExportForeachCount(llvm::StringRef *, size_t n_lines);
 
   bool ParseExportVarCount(llvm::StringRef *, size_t n_lines);
@@ -222,6 +227,8 @@ public:
 
   void Dump(Stream &strm) const;
 
+  void WarnIfVersionMismatch(Stream *s) const;
+
   const lldb::ModuleSP m_module;
   std::vector<RSKernelDescriptor> m_kernels;
   std::vector<RSGlobalDescriptor> m_globals;
diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp
index 2adc0d3f6c89..0143146b79ab 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp
@@ -202,7 +202,6 @@ ELFLinuxPrStatus::ELFLinuxPrStatus() {
 
 Error ELFLinuxPrStatus::Parse(DataExtractor &data, ArchSpec &arch) {
   Error error;
-  ByteOrder byteorder = data.GetByteOrder();
   if (GetSize(arch) > data.GetByteSize()) {
     error.SetErrorStringWithFormat(
         "NT_PRSTATUS size should be %zu, but the remaining bytes are: %" PRIu64,
@@ -210,50 +209,36 @@ Error ELFLinuxPrStatus::Parse(DataExtractor &data, ArchSpec &arch) {
     return error;
   }
 
-  switch (arch.GetCore()) {
-//case ArchSpec::eCore_s390x_generic:
-  case ArchSpec::eCore_x86_64_x86_64:
-    data.ExtractBytes(0, sizeof(ELFLinuxPrStatus), byteorder, this);
-    break;
-  case ArchSpec::eCore_x86_32_i386:
-  case ArchSpec::eCore_x86_32_i486: {
-    // Parsing from a 32 bit ELF core file, and populating/reusing the structure
-    // properly, because the struct is for the 64 bit version
-    offset_t offset = 0;
-    si_signo = data.GetU32(&offset);
-    si_code = data.GetU32(&offset);
-    si_errno = data.GetU32(&offset);
+  // Read field by field to correctly account for endianess
+  // of both the core dump and the platform running lldb.
+  offset_t offset = 0;
+  si_signo = data.GetU32(&offset);
+  si_code = data.GetU32(&offset);
+  si_errno = data.GetU32(&offset);
 
-    pr_cursig = data.GetU16(&offset);
-    offset += 2; // pad
+  pr_cursig = data.GetU16(&offset);
+  offset += 2; // pad
 
-    pr_sigpend = data.GetU32(&offset);
-    pr_sighold = data.GetU32(&offset);
+  pr_sigpend = data.GetPointer(&offset);
+  pr_sighold = data.GetPointer(&offset);
 
-    pr_pid = data.GetU32(&offset);
-    pr_ppid = data.GetU32(&offset);
-    pr_pgrp = data.GetU32(&offset);
-    pr_sid = data.GetU32(&offset);
+  pr_pid = data.GetU32(&offset);
+  pr_ppid = data.GetU32(&offset);
+  pr_pgrp = data.GetU32(&offset);
+  pr_sid = data.GetU32(&offset);
 
-    pr_utime.tv_sec = data.GetU32(&offset);
-    pr_utime.tv_usec = data.GetU32(&offset);
+  pr_utime.tv_sec = data.GetPointer(&offset);
+  pr_utime.tv_usec = data.GetPointer(&offset);
 
-    pr_stime.tv_sec = data.GetU32(&offset);
-    pr_stime.tv_usec = data.GetU32(&offset);
+  pr_stime.tv_sec = data.GetPointer(&offset);
+  pr_stime.tv_usec = data.GetPointer(&offset);
 
-    pr_cutime.tv_sec = data.GetU32(&offset);
-    pr_cutime.tv_usec = data.GetU32(&offset);
+  pr_cutime.tv_sec = data.GetPointer(&offset);
+  pr_cutime.tv_usec = data.GetPointer(&offset);
 
-    pr_cstime.tv_sec = data.GetU32(&offset);
-    pr_cstime.tv_usec = data.GetU32(&offset);
+  pr_cstime.tv_sec = data.GetPointer(&offset);
+  pr_cstime.tv_usec = data.GetPointer(&offset);
 
-    break;
-  }
-  default:
-    error.SetErrorStringWithFormat("ELFLinuxPrStatus::%s Unknown architecture",
-                                   __FUNCTION__);
-    break;
-  }
 
   return error;
 }
@@ -274,49 +259,37 @@ Error ELFLinuxPrPsInfo::Parse(DataExtractor &data, ArchSpec &arch) {
         GetSize(arch), data.GetByteSize());
     return error;
   }
+  size_t size = 0;
+  offset_t offset = 0;
 
-  switch (arch.GetCore()) {
-//case ArchSpec::eCore_s390x_generic:
-  case ArchSpec::eCore_x86_64_x86_64:
-    data.ExtractBytes(0, sizeof(ELFLinuxPrPsInfo), byteorder, this);
-    break;
-  case ArchSpec::eCore_x86_32_i386:
-  case ArchSpec::eCore_x86_32_i486: {
-    // Parsing from a 32 bit ELF core file, and populating/reusing the structure
-    // properly, because the struct is for the 64 bit version
-    size_t size = 0;
-    offset_t offset = 0;
-
-    pr_state = data.GetU8(&offset);
-    pr_sname = data.GetU8(&offset);
-    pr_zomb = data.GetU8(&offset);
-    pr_nice = data.GetU8(&offset);
-
-    pr_flag = data.GetU32(&offset);
-    pr_uid = data.GetU16(&offset);
-    pr_gid = data.GetU16(&offset);
-
-    pr_pid = data.GetU32(&offset);
-    pr_ppid = data.GetU32(&offset);
-    pr_pgrp = data.GetU32(&offset);
-    pr_sid = data.GetU32(&offset);
-
-    size = 16;
-    data.ExtractBytes(offset, size, byteorder, pr_fname);
-    offset += size;
-
-    size = 80;
-    data.ExtractBytes(offset, size, byteorder, pr_psargs);
-    offset += size;
-
-    break;
-  }
-  default:
-    error.SetErrorStringWithFormat("ELFLinuxPrPsInfo::%s Unknown architecture",
-                                   __FUNCTION__);
-    break;
+  pr_state = data.GetU8(&offset);
+  pr_sname = data.GetU8(&offset);
+  pr_zomb = data.GetU8(&offset);
+  pr_nice = data.GetU8(&offset);
+  if (data.GetAddressByteSize() == 8) {
+    // Word align the next field on 64 bit.
+    offset += 4;
   }
 
+  pr_flag = data.GetPointer(&offset);
+
+  // 16 bit on 32 bit platforms, 32 bit on 64 bit platforms
+  pr_uid = data.GetMaxU64(&offset, data.GetAddressByteSize() >> 1);
+  pr_gid = data.GetMaxU64(&offset, data.GetAddressByteSize() >> 1);
+
+  pr_pid = data.GetU32(&offset);
+  pr_ppid = data.GetU32(&offset);
+  pr_pgrp = data.GetU32(&offset);
+  pr_sid = data.GetU32(&offset);
+
+  size = 16;
+  data.ExtractBytes(offset, size, byteorder, pr_fname);
+  offset += size;
+
+  size = 80;
+  data.ExtractBytes(offset, size, byteorder, pr_psargs);
+  offset += size;
+
   return error;
 }
 
@@ -329,7 +302,6 @@ ELFLinuxSigInfo::ELFLinuxSigInfo() {
 
 Error ELFLinuxSigInfo::Parse(DataExtractor &data, const ArchSpec &arch) {
   Error error;
-  ByteOrder byteorder = data.GetByteOrder();
   if (GetSize(arch) > data.GetByteSize()) {
     error.SetErrorStringWithFormat(
         "NT_SIGINFO size should be %zu, but the remaining bytes are: %" PRIu64,
@@ -337,27 +309,12 @@ Error ELFLinuxSigInfo::Parse(DataExtractor &data, const ArchSpec &arch) {
     return error;
   }
 
-  switch (arch.GetCore()) {
-  case ArchSpec::eCore_x86_64_x86_64:
-    data.ExtractBytes(0, sizeof(ELFLinuxPrStatus), byteorder, this);
-    break;
-//case ArchSpec::eCore_s390x_generic:
-  case ArchSpec::eCore_x86_32_i386:
-  case ArchSpec::eCore_x86_32_i486: {
-    // Parsing from a 32 bit ELF core file, and populating/reusing the structure
-    // properly, because the struct is for the 64 bit version
-    offset_t offset = 0;
-    si_signo = data.GetU32(&offset);
-    si_code = data.GetU32(&offset);
-    si_errno = data.GetU32(&offset);
-
-    break;
-  }
-  default:
-    error.SetErrorStringWithFormat("ELFLinuxSigInfo::%s Unknown architecture",
-                                   __FUNCTION__);
-    break;
-  }
+  // Parsing from a 32 bit ELF core file, and populating/reusing the structure
+  // properly, because the struct is for the 64 bit version
+  offset_t offset = 0;
+  si_signo = data.GetU32(&offset);
+  si_code = data.GetU32(&offset);
+  si_errno = data.GetU32(&offset);
 
   return error;
 }
diff --git a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index f97a18448f0a..b84c4a83dee4 100644
--- a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -322,16 +322,15 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     switch(CodeID) {
     default:return nullptr;
       STRINGIFY_CODE(METADATA, STRING_OLD)
-      STRINGIFY_CODE(METADATA, STRINGS)
-      STRINGIFY_CODE(METADATA, NAME)
-      STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
-      STRINGIFY_CODE(METADATA, NODE)
       STRINGIFY_CODE(METADATA, VALUE)
+      STRINGIFY_CODE(METADATA, NODE)
+      STRINGIFY_CODE(METADATA, NAME)
+      STRINGIFY_CODE(METADATA, DISTINCT_NODE)
+      STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
+      STRINGIFY_CODE(METADATA, LOCATION)
       STRINGIFY_CODE(METADATA, OLD_NODE)
       STRINGIFY_CODE(METADATA, OLD_FN_NODE)
       STRINGIFY_CODE(METADATA, NAMED_NODE)
-      STRINGIFY_CODE(METADATA, DISTINCT_NODE)
-      STRINGIFY_CODE(METADATA, LOCATION)
       STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
       STRINGIFY_CODE(METADATA, SUBRANGE)
       STRINGIFY_CODE(METADATA, ENUMERATOR)
@@ -353,6 +352,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
       STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
       STRINGIFY_CODE(METADATA, MODULE)
+      STRINGIFY_CODE(METADATA, MACRO)
+      STRINGIFY_CODE(METADATA, MACRO_FILE)
+      STRINGIFY_CODE(METADATA, STRINGS)
+      STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
+      STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
       STRINGIFY_CODE(METADATA, INDEX_OFFSET)
       STRINGIFY_CODE(METADATA, INDEX)
     }
diff --git a/contrib/llvm/tools/llvm-link/llvm-link.cpp b/contrib/llvm/tools/llvm-link/llvm-link.cpp
index 43431ac3398a..e89696e7e7c2 100644
--- a/contrib/llvm/tools/llvm-link/llvm-link.cpp
+++ b/contrib/llvm/tools/llvm-link/llvm-link.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 
 #include <memory>
@@ -202,19 +203,20 @@ static void diagnosticHandler(const DiagnosticInfo &DI, void *C) {
 }
 
 /// Import any functions requested via the -import option.
-static bool importFunctions(const char *argv0, LLVMContext &Context,
-                            Linker &L) {
+static bool importFunctions(const char *argv0, Module &DestModule) {
   if (SummaryIndex.empty())
     return true;
   std::unique_ptr<ModuleSummaryIndex> Index =
       ExitOnErr(llvm::getModuleSummaryIndexForFile(SummaryIndex));
 
   // Map of Module -> List of globals to import from the Module
-  std::map<StringRef, DenseSet<const GlobalValue *>> ModuleToGlobalsToImportMap;
-  auto ModuleLoader = [&Context](const char *argv0,
-                                 const std::string &Identifier) {
-    return loadFile(argv0, Identifier, Context, false);
+  FunctionImporter::ImportMapTy ImportList;
+
+  auto ModuleLoader = [&DestModule](const char *argv0,
+                                    const std::string &Identifier) {
+    return loadFile(argv0, Identifier, DestModule.getContext(), false);
   };
+
   ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
   for (const auto &Import : Imports) {
     // Identify the requested function and its bitcode source file.
@@ -253,35 +255,14 @@ static bool importFunctions(const char *argv0, LLVMContext &Context,
     if (Verbose)
       errs() << "Importing " << FunctionName << " from " << FileName << "\n";
 
-    auto &Entry = ModuleToGlobalsToImportMap[SrcModule.getModuleIdentifier()];
-    Entry.insert(F);
-
-    ExitOnErr(F->materialize());
-  }
-
-  // Do the actual import of globals now, one Module at a time
-  for (auto &GlobalsToImportPerModule : ModuleToGlobalsToImportMap) {
-    // Get the module for the import
-    auto &GlobalsToImport = GlobalsToImportPerModule.second;
-    std::unique_ptr<Module> SrcModule =
-        ModuleLoaderCache.takeModule(GlobalsToImportPerModule.first);
-    assert(&Context == &SrcModule->getContext() && "Context mismatch");
-
-    // If modules were created with lazy metadata loading, materialize it
-    // now, before linking it (otherwise this will be a noop).
-    ExitOnErr(SrcModule->materializeMetadata());
-    UpgradeDebugInfo(*SrcModule);
-
-    // Linkage Promotion and renaming
-    if (renameModuleForThinLTO(*SrcModule, *Index, &GlobalsToImport))
-      return true;
-
-    // Instruct the linker to not automatically import linkonce defintion.
-    unsigned Flags = Linker::Flags::DontForceLinkLinkonceODR;
-
-    if (L.linkInModule(std::move(SrcModule), Flags, &GlobalsToImport))
-      return false;
+    auto &Entry = ImportList[FileName];
+    Entry.insert(std::make_pair(F->getGUID(), /* (Unused) threshold */ 1.0));
   }
+  auto CachedModuleLoader = [&](StringRef Identifier) {
+    return ModuleLoaderCache.takeModule(Identifier);
+  };
+  FunctionImporter Importer(*Index, CachedModuleLoader);
+  ExitOnErr(Importer.importFunctions(DestModule, ImportList));
 
   return true;
 }
@@ -374,7 +355,7 @@ int main(int argc, char **argv) {
     return 1;
 
   // Import any functions requested via -import
-  if (!importFunctions(argv[0], Context, L))
+  if (!importFunctions(argv[0], *Composite))
     return 1;
 
   if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;