Merge llvm, clang, lld and lldb trunk r291012, and resolve conflicts.

2017-01-04 22:19:42 +00:00 · 2017-01-04 22:19:42 +00:00 · 8e0f8b8c96
commit 8e0f8b8c96
parent d8c03e73dc 0c75eea8f6
130 changed files with 2367 additions and 1719 deletions
--- a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@ -21,8 +21,8 @@
 //   class MyClass : public RefCountedBase<MyClass> {};
 //
 //   void foo() {
-//     // Objects that inherit from RefCountedBase should always be instantiated
-//     // on the heap, never on the stack.
+//     // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by
+//     // 1 (from 0 in this case).
 //     IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
 //
 //     // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
@ -68,9 +68,6 @@ namespace llvm {
 /// calls to Release() and Retain(), which increment and decrement the object's
 /// refcount, respectively.  When a Release() call decrements the refcount to 0,
 /// the object deletes itself.
-///
-/// Objects that inherit from RefCountedBase should always be allocated with
-/// operator new.
 template <class Derived> class RefCountedBase {
  mutable unsigned RefCount = 0;

--- a/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
+++ b/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
@ -18,6 +18,7 @@

 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include <algorithm>
@ -107,6 +108,39 @@ public:
    return false;
  }

+  /// Insert a sequence of new elements into the PriorityWorklist.
+  template <typename SequenceT>
+  typename std::enable_if<!std::is_convertible<SequenceT, T>::value>::type
+  insert(SequenceT &&Input) {
+    if (std::begin(Input) == std::end(Input))
+      // Nothing to do for an empty input sequence.
+      return;
+
+    // First pull the input sequence into the vector as a bulk append
+    // operation.
+    ptrdiff_t StartIndex = V.size();
+    V.insert(V.end(), std::begin(Input), std::end(Input));
+    // Now walk backwards fixing up the index map and deleting any duplicates.
+    for (ptrdiff_t i = V.size() - 1; i >= StartIndex; --i) {
+      auto InsertResult = M.insert({V[i], i});
+      if (InsertResult.second)
+        continue;
+
+      // If the existing index is before this insert's start, nuke that one and
+      // move it up.
+      ptrdiff_t &Index = InsertResult.first->second;
+      if (Index < StartIndex) {
+        V[Index] = T();
+        Index = i;
+        continue;
+      }
+
+      // Otherwise the existing one comes first so just clear out the value in
+      // this slot.
+      V[i] = T();
+    }
+  }
+
  /// Remove the last element of the PriorityWorklist.
  void pop_back() {
    assert(!empty() && "Cannot remove an element when empty!");
@ -169,6 +203,11 @@ public:
    return true;
  }

+  /// Reverse the items in the PriorityWorklist.
+  ///
+  /// This does an in-place reversal. Other kinds of reverse aren't easy to
+  /// support in the face of the worklist semantics.
+
  /// Completely clear the PriorityWorklist
  void clear() {
    M.clear();
--- a/contrib/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm/include/llvm/Analysis/Loads.h
@ -23,10 +23,9 @@ namespace llvm {
 class DataLayout;
 class MDNode;

-/// isDereferenceablePointer - Return true if this is always a dereferenceable
-/// pointer. If the context instruction is specified perform context-sensitive
-/// analysis and return true if the pointer is dereferenceable at the
-/// specified instruction.
+/// Return true if this is always a dereferenceable pointer. If the context
+/// instruction is specified perform context-sensitive analysis and return true
+/// if the pointer is dereferenceable at the specified instruction.
 bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
                              const Instruction *CtxI = nullptr,
                              const DominatorTree *DT = nullptr);
@ -40,8 +39,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
                                        const Instruction *CtxI = nullptr,
                                        const DominatorTree *DT = nullptr);

-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap.
+/// Return true if we know that executing a load from this value cannot trap.
 ///
 /// If DT and ScanFrom are specified this method performs context-sensitive
 /// analysis and returns true if it is safe to load immediately before ScanFrom.
@ -54,12 +52,12 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
                                 Instruction *ScanFrom = nullptr,
                                 const DominatorTree *DT = nullptr);

-/// DefMaxInstsToScan - the default number of maximum instructions
-/// to scan in the block, used by FindAvailableLoadedValue().
+/// The default number of maximum instructions to scan in the block, used by
+/// FindAvailableLoadedValue().
 extern cl::opt<unsigned> DefMaxInstsToScan;

-/// \brief Scan backwards to see if we have the value of the given load
-/// available locally within a small number of instructions.
+/// Scan backwards to see if we have the value of the given load available
+/// locally within a small number of instructions.
 ///
 /// You can use this function to scan across multiple blocks: after you call
 /// this function, if ScanFrom points at the beginning of the block, it's safe
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@ -208,6 +208,8 @@ public:
    SledKind Kind;
    bool AlwaysInstrument;
    const class Function *Fn;
+
+    void emit(int, MCStreamer *, const MCSymbol *) const;
  };

  // All the sleds to be emitted.
@ -216,6 +218,9 @@ public:
  // Helper function to record a given XRay sled.
  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);

+  /// Emit a table with all XRay instrumentation points.
+  void emitXRayTable();
+
  //===------------------------------------------------------------------===//
  // MachineFunctionPass Implementation.
  //===------------------------------------------------------------------===//
--- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
@ -59,6 +59,9 @@ class MachineDominatorTree : public MachineFunctionPass {
  /// such as BB == elt.NewBB.
  mutable SmallSet<MachineBasicBlock *, 32> NewBBs;

+  /// The DominatorTreeBase that is used to compute a normal dominator tree
+  DominatorTreeBase<MachineBasicBlock>* DT;
+
  /// \brief Apply all the recorded critical edges to the DT.
  /// This updates the underlying DT information in a way that uses
  /// the fast query path of DT as much as possible.
@ -68,7 +71,6 @@ class MachineDominatorTree : public MachineFunctionPass {

 public:
  static char ID; // Pass ID, replacement for typeid
-  DominatorTreeBase<MachineBasicBlock>* DT;

  MachineDominatorTree();

--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@ -116,12 +116,12 @@ public:
    // An unsigned integer indicating the identity of the source file
    // corresponding to a machine instruction.
    uint16_t File;
-    // An unsigned integer whose value encodes the applicable instruction set
-    // architecture for the current instruction.
-    uint8_t Isa;
    // An unsigned integer representing the DWARF path discriminator value
    // for this location.
    uint32_t Discriminator;
+    // An unsigned integer whose value encodes the applicable instruction set
+    // architecture for the current instruction.
+    uint8_t Isa;
    // A boolean indicating that the current instruction is the beginning of a
    // statement.
    uint8_t IsStmt:1,
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id :
 // Instruction Intrinsics
 //===----------------------------------------------------------------------===//

+// The first parameter is s_sendmsg immediate (i16),
+// the second one is copied to m0
+def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+
 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
  Intrinsic<[], [], [IntrConvergent]>;

--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@ -2063,130 +2063,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }

-// Vector extract and insert
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_vextractf32x4_512 :
-      GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
-                 Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
-                            llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti32x4_512 :
-      GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
-                 Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty,
-                            llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf32x4_256 :
-      GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">,
-                 Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
-                            llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti32x4_256 :
-      GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">,
-                 Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty,
-                            llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf64x2_256 :
-      GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">,
-                 Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
-                            llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti64x2_256 :
-      GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">,
-                 Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty,
-                            llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf64x2_512 :
-      GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">,
-                 Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
-                            llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti64x2_512 :
-      GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">,
-                 Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
-                            llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf32x8_512 :
-      GCCBuiltin<"__builtin_ia32_extractf32x8_mask">,
-                 Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
-                            llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti32x8_512 :
-      GCCBuiltin<"__builtin_ia32_extracti32x8_mask">,
-                 Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty,
-                            llvm_v8i32_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextractf64x4_512 :
-      GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
-                 Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
-                            llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vextracti64x4_512 :
-      GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
-                 Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
-                            llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf32x4_256 :
-        GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf32x4_512 :
-        GCCBuiltin<"__builtin_ia32_insertf32x4_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf32x8_512 :
-        GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf64x2_256 :
-        GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf64x2_512 :
-        GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_insertf64x4_512 :
-        GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti32x4_256 :
-        GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-          [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti32x4_512 :
-        GCCBuiltin<"__builtin_ia32_inserti32x4_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-          [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti32x8_512 :
-        GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-          [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti64x2_256 :
-        GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
-          Intrinsic<[llvm_v4i64_ty],
-          [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti64x2_512 :
-        GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
-          Intrinsic<[llvm_v8i64_ty],
-          [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_inserti64x4_512 :
-        GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
-          Intrinsic<[llvm_v8i64_ty],
-          [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-}
-
 // Conditional load ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
  def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
--- a/contrib/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm/include/llvm/Support/FileSystem.h
@ -769,17 +769,13 @@ namespace detail {
  std::error_code directory_iterator_increment(DirIterState &);
  std::error_code directory_iterator_destruct(DirIterState &);

-  /// DirIterState - Keeps state for the directory_iterator. It is reference
-  /// counted in order to preserve InputIterator semantics on copy.
-  struct DirIterState : public RefCountedBase<DirIterState> {
-    DirIterState()
-      : IterationHandle(0) {}
-
+  /// Keeps state for the directory_iterator.
+  struct DirIterState {
    ~DirIterState() {
      directory_iterator_destruct(*this);
    }

-    intptr_t IterationHandle;
+    intptr_t IterationHandle = 0;
    directory_entry CurrentEntry;
  };
 } // end namespace detail
@ -788,23 +784,23 @@ namespace detail {
 /// operator++ because we need an error_code. If it's really needed we can make
 /// it call report_fatal_error on error.
 class directory_iterator {
-  IntrusiveRefCntPtr<detail::DirIterState> State;
+  std::shared_ptr<detail::DirIterState> State;

 public:
  explicit directory_iterator(const Twine &path, std::error_code &ec) {
-    State = new detail::DirIterState;
+    State = std::make_shared<detail::DirIterState>();
    SmallString<128> path_storage;
    ec = detail::directory_iterator_construct(*State,
            path.toStringRef(path_storage));
  }

  explicit directory_iterator(const directory_entry &de, std::error_code &ec) {
-    State = new detail::DirIterState;
+    State = std::make_shared<detail::DirIterState>();
    ec = detail::directory_iterator_construct(*State, de.path());
  }

  /// Construct end iterator.
-  directory_iterator() : State(nullptr) {}
+  directory_iterator() = default;

  // No operator++ because we need error_code.
  directory_iterator &increment(std::error_code &ec) {
--- a/contrib/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h
@ -209,6 +209,15 @@ struct DocumentListTraits {
  // static T::value_type& element(IO &io, T &seq, size_t index);
 };

+/// This class should be specialized by any type that needs to be converted
+/// to/from a YAML mapping in the case where the names of the keys are not known
+/// in advance, e.g. a string map.
+template <typename T>
+struct CustomMappingTraits {
+  // static void inputOne(IO &io, StringRef key, T &elem);
+  // static void output(IO &io, T &elem);
+};
+
 // Only used for better diagnostics of missing traits
 template <typename T>
 struct MissingTrait;
@ -358,6 +367,23 @@ public:
  static bool const value =  (sizeof(test<SequenceTraits<T>>(nullptr)) == 1);
 };

+// Test if CustomMappingTraits<T> is defined on type T.
+template <class T>
+struct has_CustomMappingTraits
+{
+  typedef void (*Signature_input)(IO &io, StringRef key, T &v);
+
+  template <typename U>
+  static char test(SameType<Signature_input, &U::inputOne>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value =
+      (sizeof(test<CustomMappingTraits<T>>(nullptr)) == 1);
+};
+
 // has_FlowTraits<int> will cause an error with some compilers because
 // it subclasses int.  Using this wrapper only instantiates the
 // real has_FlowTraits only if the template type is a class.
@ -493,6 +519,7 @@ struct missingTraits
                                        !has_BlockScalarTraits<T>::value &&
                                        !has_MappingTraits<T, Context>::value &&
                                        !has_SequenceTraits<T>::value &&
+                                        !has_CustomMappingTraits<T>::value &&
                                        !has_DocumentListTraits<T>::value> {};

 template <typename T, typename Context>
@ -531,6 +558,7 @@ public:
  virtual void endMapping() = 0;
  virtual bool preflightKey(const char*, bool, bool, bool &, void *&) = 0;
  virtual void postflightKey(void*) = 0;
+  virtual std::vector<StringRef> keys() = 0;

  virtual void beginFlowMapping() = 0;
  virtual void endFlowMapping() = 0;
@ -818,6 +846,21 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) {
  }
 }

+template <typename T>
+typename std::enable_if<has_CustomMappingTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
+  if ( io.outputting() ) {
+    io.beginMapping();
+    CustomMappingTraits<T>::output(io, Val);
+    io.endMapping();
+  } else {
+    io.beginMapping();
+    for (StringRef key : io.keys())
+      CustomMappingTraits<T>::inputOne(io, key, Val);
+    io.endMapping();
+  }
+}
+
 template <typename T>
 typename std::enable_if<missingTraits<T, EmptyContext>::value, void>::type
 yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
@ -1074,6 +1117,7 @@ private:
  void endMapping() override;
  bool preflightKey(const char *, bool, bool, bool &, void *&) override;
  void postflightKey(void *) override;
+  std::vector<StringRef> keys() override;
  void beginFlowMapping() override;
  void endFlowMapping() override;
  unsigned beginSequence() override;
@ -1154,10 +1198,8 @@ private:

    typedef llvm::StringMap<std::unique_ptr<HNode>> NameToNode;

-    bool isValidKey(StringRef key);
-
    NameToNode                        Mapping;
-    llvm::SmallVector<const char*, 6> ValidKeys;
+    llvm::SmallVector<std::string, 6> ValidKeys;
  };

  class SequenceHNode : public HNode {
@ -1215,6 +1257,7 @@ public:
  void endMapping() override;
  bool preflightKey(const char *key, bool, bool, bool &, void *&) override;
  void postflightKey(void *) override;
+  std::vector<StringRef> keys() override;
  void beginFlowMapping() override;
  void endFlowMapping() override;
  unsigned beginSequence() override;
@ -1384,6 +1427,17 @@ operator>>(Input &In, T &Val) {
  return In;
 }

+// Define non-member operator>> so that Input can stream in a string map.
+template <typename T>
+inline
+typename std::enable_if<has_CustomMappingTraits<T>::value, Input &>::type
+operator>>(Input &In, T &Val) {
+  EmptyContext Ctx;
+  if (In.setCurrentDocument())
+    yamlize(In, Val, true, Ctx);
+  return In;
+}
+
 // Provide better error message about types missing a trait specialization
 template <typename T>
 inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@ -1457,6 +1511,21 @@ operator<<(Output &Out, T &Val) {
  return Out;
 }

+// Define non-member operator<< so that Output can stream out a string map.
+template <typename T>
+inline
+typename std::enable_if<has_CustomMappingTraits<T>::value, Output &>::type
+operator<<(Output &Out, T &Val) {
+  EmptyContext Ctx;
+  Out.beginDocuments();
+  if (Out.preflightDocument(0)) {
+    yamlize(Out, Val, true, Ctx);
+    Out.postflightDocument();
+  }
+  Out.endDocuments();
+  return Out;
+}
+
 // Provide better error message about types missing a trait specialization
 template <typename T>
 inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@ -1476,6 +1545,18 @@ template <typename T> struct SequenceTraitsImpl {
  }
 };

+/// Implementation of CustomMappingTraits for std::map<std::string, T>.
+template <typename T> struct StdMapStringCustomMappingTraitsImpl {
+  typedef std::map<std::string, T> map_type;
+  static void inputOne(IO &io, StringRef key, map_type &v) {
+    io.mapRequired(key.str().c_str(), v[key]);
+  }
+  static void output(IO &io, map_type &v) {
+    for (auto &p : v)
+      io.mapRequired(p.first.c_str(), p.second);
+  }
+};
+
 } // end namespace yaml
 } // end namespace llvm

@ -1530,4 +1611,15 @@ template <typename T> struct SequenceTraitsImpl {
  }                                                                            \
  }

+/// Utility for declaring that std::map<std::string, _type> should be considered
+/// a YAML map.
+#define LLVM_YAML_IS_STRING_MAP(_type)                                         \
+  namespace llvm {                                                             \
+  namespace yaml {                                                             \
+  template <>                                                                  \
+  struct CustomMappingTraits<std::map<std::string, _type>>                     \
+      : public StdMapStringCustomMappingTraitsImpl<_type> {};                  \
+  }                                                                            \
+  }
+
 #endif // LLVM_SUPPORT_YAMLTRAITS_H
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@ -2542,9 +2542,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
    return !CFP->getValueAPF().isNegZero();

-  // FIXME: Magic number! At the least, this should be given a name because it's
-  // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
-  // expose it as a parameter, so it can be used for testing / experimenting.
  if (Depth == MaxDepth)
    return false;  // Limit search depth.

@ -2589,9 +2586,6 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
    return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();

-  // FIXME: Magic number! At the least, this should be given a name because it's
-  // used similarly in CannotBeNegativeZero(). A better fix may be to
-  // expose it as a parameter, so it can be used for testing / experimenting.
  if (Depth == MaxDepth)
    return false;  // Limit search depth.

--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@ -749,7 +749,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
    // handles the case where this is type ODRed with a definition needed
    // by the importing module, in which case the existing definition is
    // used.
-    if (IsImporting && !ImportFullTypeDefinitions &&
+    if (IsImporting && !ImportFullTypeDefinitions && Identifier &&
        (Tag == dwarf::DW_TAG_enumeration_type ||
         Tag == dwarf::DW_TAG_class_type ||
         Tag == dwarf::DW_TAG_structure_type ||
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V,
               TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
      DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
      NoopInput = Op;
-    } else if (isa<CallInst>(I)) {
-      // Look through call (skipping callee)
-      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
-           i != e; ++i) {
-        unsigned attrInd = i - I->op_begin() + 1;
-        if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-          NoopInput = *i;
-          break;
-        }
-      }
-    } else if (isa<InvokeInst>(I)) {
-      // Look through invoke (skipping BB, BB, Callee)
-      for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
-           i != e; ++i) {
-        unsigned attrInd = i - I->op_begin() + 1;
-        if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
-            isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
-          NoopInput = *i;
-          break;
-        }
-      }
+    } else if (auto CS = ImmutableCallSite(I)) {
+      const Value *ReturnedOp = CS.getReturnedArgOperand();
+      if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
+        NoopInput = ReturnedOp;
    } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
      // Value may come from either the aggregate or the scalar
      ArrayRef<unsigned> InsertLoc = IVI->getIndices();
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -37,6 +37,8 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
@ -2610,6 +2612,61 @@ AsmPrinterHandler::~AsmPrinterHandler() {}

 void AsmPrinterHandler::markFunctionEnd() {}

+// In the binary's "xray_instr_map" section, an array of these function entries
+// describes each instrumentation point.  When XRay patches your code, the index
+// into this table will be given to your handler as a patch point identifier.
+void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
+                                         const MCSymbol *CurrentFnSym) const {
+  Out->EmitSymbolValue(Sled, Bytes);
+  Out->EmitSymbolValue(CurrentFnSym, Bytes);
+  auto Kind8 = static_cast<uint8_t>(Kind);
+  Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+  Out->EmitBytes(
+      StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
+  Out->EmitZeros(2 * Bytes - 2);  // Pad the previous two entries
+}
+
+void AsmPrinter::emitXRayTable() {
+  if (Sleds.empty())
+    return;
+
+  auto PrevSection = OutStreamer->getCurrentSectionOnly();
+  auto Fn = MF->getFunction();
+  MCSection *Section = nullptr;
+  if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
+    if (Fn->hasComdat()) {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+                                         Fn->getComdat()->getName());
+    } else {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC);
+    }
+  } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
+    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+                                         SectionKind::getReadOnlyWithRel());
+  } else {
+    llvm_unreachable("Unsupported target");
+  }
+
+  // Before we switch over, we force a reference to a label inside the
+  // xray_instr_map section. Since this function is always called just
+  // before the function's end, we assume that this is happening after
+  // the last return instruction.
+
+  auto WordSizeBytes = TM.getPointerSize();
+  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
+  OutStreamer->EmitCodeAlignment(16);
+  OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
+  OutStreamer->SwitchSection(Section);
+  OutStreamer->EmitLabel(Tmp);
+  for (const auto &Sled : Sleds)
+    Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
+
+  OutStreamer->SwitchSection(PrevSection);
+  Sleds.clear();
+}
+
 void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
  SledKind Kind) {
  auto Fn = MI.getParent()->getParent()->getFunction();
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@ -1124,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills(
  // earlier spill with smaller SlotIndex.
  for (const auto CurrentSpill : Spills) {
    MachineBasicBlock *Block = CurrentSpill->getParent();
-    MachineDomTreeNode *Node = MDT.DT->getNode(Block);
+    MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
    MachineInstr *PrevSpill = SpillBBToSpill[Node];
    if (PrevSpill) {
      SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
@ -1132,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills(
      MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
      MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
      SpillsToRm.push_back(SpillToRm);
-      SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
+      SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
    } else {
-      SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
+      SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
    }
  }
  for (const auto SpillToRm : SpillsToRm)
@ -1209,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders(
  // Sort the nodes in WorkSet in top-down order and save the nodes
  // in Orders. Orders will be used for hoisting in runHoistSpills.
  unsigned idx = 0;
-  Orders.push_back(MDT.DT->getNode(Root));
+  Orders.push_back(MDT.getBase().getNode(Root));
  do {
    MachineDomTreeNode *Node = Orders[idx++];
    const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -4277,7 +4277,8 @@ struct BaseIndexOffset {
  }

  /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
+  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
+                               int64_t PartialOffset = 0) {
    bool IsIndexSignExt = false;

    // Split up a folded GlobalAddress+Offset into its component parts.
@ -4286,7 +4287,7 @@ struct BaseIndexOffset {
        return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
                                                    SDLoc(GA),
                                                    GA->getValueType(0),
-                                                    /*Offset=*/0,
+                                                    /*Offset=*/PartialOffset,
                                                    /*isTargetGA=*/false,
                                                    GA->getTargetFlags()),
                               SDValue(),
@ -4298,14 +4299,13 @@ struct BaseIndexOffset {
    // instruction, then it could be just the BASE or everything else we don't
    // know how to handle. Just use Ptr as BASE and give up.
    if (Ptr->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);

    // We know that we have at least an ADD instruction. Try to pattern match
    // the simple case of BASE + OFFSET.
    if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
      int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
-      return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
-                              IsIndexSignExt);
+      return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
    }

    // Inside a loop the current BASE pointer is calculated using an ADD and a
@ -4314,7 +4314,7 @@ struct BaseIndexOffset {
    //          (i64 mul (i64 %induction_var)
    //                   (i64 %element_size)))
    if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);

    // Look at Base + Index + Offset cases.
    SDValue Base = Ptr->getOperand(0);
@ -4328,14 +4328,14 @@ struct BaseIndexOffset {

    // Either the case of Base + Index (no offset) or something else.
    if (IndexOffset->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+      return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);

    // Now we have the case of Base + Index + offset.
    SDValue Index = IndexOffset->getOperand(0);
    SDValue Offset = IndexOffset->getOperand(1);

    if (!isa<ConstantSDNode>(Offset))
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+      return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);

    // Ignore signextends.
    if (Index->getOpcode() == ISD::SIGN_EXTEND) {
@ -4344,7 +4344,7 @@ struct BaseIndexOffset {
    } else IsIndexSignExt = false;

    int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
-    return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+    return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
  }
 };
 } // namespace
--- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@ -88,15 +88,15 @@ void OProfileJITEventListener::NotifyObjectEmitted(
  // Use symbol info to iterate functions in the object.
  for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
    SymbolRef Sym = P.first;
-    if (Sym.getType() != SymbolRef::ST_Function)
+    if (!Sym.getType() || *Sym.getType() != SymbolRef::ST_Function)
      continue;

-    ErrorOr<StringRef> NameOrErr = Sym.getName();
-    if (NameOrErr.getError())
+    Expected<StringRef> NameOrErr = Sym.getName();
+    if (!NameOrErr)
      continue;
    StringRef Name = *NameOrErr;
-    ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
-    if (AddrOrErr.getError())
+    Expected<uint64_t> AddrOrErr = Sym.getAddress();
+    if (!AddrOrErr)
      continue;
    uint64_t Addr = *AddrOrErr;
    uint64_t Size = P.second;
@ -128,9 +128,9 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
    for (symbol_iterator I = DebugObj.symbol_begin(),
                         E = DebugObj.symbol_end();
         I != E; ++I) {
-      if (I->getType() == SymbolRef::ST_Function) {
-        ErrorOr<uint64_t> AddrOrErr = I->getAddress();
-        if (AddrOrErr.getError())
+      if (I->getType() && *I->getType() == SymbolRef::ST_Function) {
+        Expected<uint64_t> AddrOrErr = I->getAddress();
+        if (!AddrOrErr)
          continue;
        uint64_t Addr = *AddrOrErr;

--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@ -342,8 +342,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
         Name.startswith("avx.vinsertf128.") || // Added in 3.7
         Name == "avx2.vinserti128" || // Added in 3.7
+         Name.startswith("avx512.mask.insert") || // Added in 4.0
         Name.startswith("avx.vextractf128.") || // Added in 3.7
         Name == "avx2.vextracti128" || // Added in 3.7
+         Name.startswith("avx512.mask.vextract") || // Added in 4.0
         Name.startswith("sse4a.movnt.") || // Added in 3.9
         Name.startswith("avx.movnt.") || // Added in 3.2
         Name.startswith("avx512.storent.") || // Added in 3.9
@ -1150,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {

      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
    } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
-                         Name == "avx2.vinserti128")) {
+                         Name == "avx2.vinserti128" ||
+                         Name.startswith("avx512.mask.insert"))) {
      Value *Op0 = CI->getArgOperand(0);
      Value *Op1 = CI->getArgOperand(1);
      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
-      VectorType *VecTy = cast<VectorType>(CI->getType());
-      unsigned NumElts = VecTy->getNumElements();
+      unsigned DstNumElts = CI->getType()->getVectorNumElements();
+      unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
+      unsigned Scale = DstNumElts / SrcNumElts;

      // Mask off the high bits of the immediate value; hardware ignores those.
-      Imm = Imm & 1;
+      Imm = Imm % Scale;

-      // Extend the second operand into a vector that is twice as big.
+      // Extend the second operand into a vector the size of the destination.
      Value *UndefV = UndefValue::get(Op1->getType());
-      SmallVector<uint32_t, 8> Idxs(NumElts);
-      for (unsigned i = 0; i != NumElts; ++i)
+      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      for (unsigned i = 0; i != SrcNumElts; ++i)
        Idxs[i] = i;
+      for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
+        Idxs[i] = SrcNumElts;
      Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);

      // Insert the second operand into the first operand.
@ -1178,33 +1184,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
      // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
      // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >

-      // The low half of the result is either the low half of the 1st operand
-      // or the low half of the 2nd operand (the inserted vector).
-      for (unsigned i = 0; i != NumElts / 2; ++i)
-        Idxs[i] = Imm ? i : (i + NumElts);
-      // The high half of the result is either the low half of the 2nd operand
-      // (the inserted vector) or the high half of the 1st operand.
-      for (unsigned i = NumElts / 2; i != NumElts; ++i)
-        Idxs[i] = Imm ? (i + NumElts / 2) : i;
+      // First fill with identify mask.
+      for (unsigned i = 0; i != DstNumElts; ++i)
+        Idxs[i] = i;
+      // Then replace the elements where we need to insert.
+      for (unsigned i = 0; i != SrcNumElts; ++i)
+        Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
      Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
+
+      // If the intrinsic has a mask operand, handle that.
+      if (CI->getNumArgOperands() == 5)
+        Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+                            CI->getArgOperand(3));
    } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
-                         Name == "avx2.vextracti128")) {
+                         Name == "avx2.vextracti128" ||
+                         Name.startswith("avx512.mask.vextract"))) {
      Value *Op0 = CI->getArgOperand(0);
      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
-      VectorType *VecTy = cast<VectorType>(CI->getType());
-      unsigned NumElts = VecTy->getNumElements();
+      unsigned DstNumElts = CI->getType()->getVectorNumElements();
+      unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
+      unsigned Scale = SrcNumElts / DstNumElts;

      // Mask off the high bits of the immediate value; hardware ignores those.
-      Imm = Imm & 1;
+      Imm = Imm % Scale;

-      // Get indexes for either the high half or low half of the input vector.
-      SmallVector<uint32_t, 4> Idxs(NumElts);
-      for (unsigned i = 0; i != NumElts; ++i) {
-        Idxs[i] = Imm ? (i + NumElts) : i;
+      // Get indexes for the subvector of the input vector.
+      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      for (unsigned i = 0; i != DstNumElts; ++i) {
+        Idxs[i] = i + (Imm * DstNumElts);
      }
+      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

-      Value *UndefV = UndefValue::get(Op0->getType());
-      Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
+      // If the intrinsic has a mask operand, handle that.
+      if (CI->getNumArgOperands() == 4)
+        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                            CI->getArgOperand(2));
    } else if (!IsX86 && Name == "stackprotectorcheck") {
      Rep = nullptr;
    } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
--- a/contrib/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm/lib/LTO/LTO.cpp
@ -891,23 +891,17 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
      ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
                      AddStream, Cache);

-  // Partition numbers for ThinLTO jobs start at 1 (see comments for
-  // GlobalResolution in LTO.h). Task numbers, however, start at
-  // ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
-  // through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
-  // generation partitions.
+  // Task numbers start at ParallelCodeGenParallelismLevel if an LTO
+  // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1
+  // are reserved for parallel code generation partitions.
  unsigned Task =
      HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0;
-  unsigned Partition = 1;
-
  for (auto &Mod : ThinLTO.ModuleMap) {
    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
                                     ExportLists[Mod.first],
                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
      return E;
-
    ++Task;
-    ++Partition;
  }

  return BackendProc->wait();
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@ -76,8 +76,12 @@ namespace llvm {
     compile-time arithmetic on PPC double-double numbers, it is not able
     to represent all possible values held by a PPC double-double number,
     for example: (long double) 1.0 + (long double) 0x1p-106
-     Should this be replaced by a full emulation of PPC double-double?  */
-  static const fltSemantics semPPCDoubleDouble = {0, 0, 0, 0};
+     Should this be replaced by a full emulation of PPC double-double?
+
+     Note: we need to make the value different from semBogus as otherwise
+     an unsafe optimization may collapse both values to a single address,
+     and we heavily rely on them having distinct addresses.             */
+  static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};

  /* There are temporary semantics for the real PPCDoubleDouble implementation.
     Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@ -1069,6 +1069,7 @@ StringRef sys::getHostCPUName() {
      .Case("POWER7", "pwr7")
      .Case("POWER8", "pwr8")
      .Case("POWER8E", "pwr8")
+      .Case("POWER8NVL", "pwr8")
      .Case("POWER9", "pwr9")
      .Default(generic);
 }
--- a/contrib/llvm/lib/Support/NativeFormatting.cpp
+++ b/contrib/llvm/lib/Support/NativeFormatting.cpp
@ -239,10 +239,7 @@ void llvm::write_double(raw_ostream &S, double N, FloatStyle Style,
    N *= 100.0;

  char Buf[32];
-  unsigned Len;
-  Len = format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
-  if (Style == FloatStyle::Percent)
-    ++Len;
+  format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
  S << Buf;
  if (Style == FloatStyle::Percent)
    S << '%';
--- a/contrib/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@ -118,6 +118,18 @@ void Input::beginMapping() {
  }
 }

+std::vector<StringRef> Input::keys() {
+  MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+  std::vector<StringRef> Ret;
+  if (!MN) {
+    setError(CurrentNode, "not a mapping");
+    return Ret;
+  }
+  for (auto &P : MN->Mapping)
+    Ret.push_back(P.first());
+  return Ret;
+}
+
 bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
                         void *&SaveInfo) {
  UseDefault = false;
@ -163,7 +175,7 @@ void Input::endMapping() {
  if (!MN)
    return;
  for (const auto &NN : MN->Mapping) {
-    if (!MN->isValidKey(NN.first())) {
+    if (!is_contained(MN->ValidKeys, NN.first())) {
      setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'");
      break;
    }
@ -373,14 +385,6 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
  }
 }

-bool Input::MapHNode::isValidKey(StringRef Key) {
-  for (const char *K : ValidKeys) {
-    if (Key.equals(K))
-      return true;
-  }
-  return false;
-}
-
 void Input::setError(const Twine &Message) {
  this->setError(CurrentNode, Message);
 }
@ -451,6 +455,10 @@ void Output::endMapping() {
  StateStack.pop_back();
 }

+std::vector<StringRef> Output::keys() {
+  report_fatal_error("invalid call");
+}
+
 bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
                          bool &UseDefault, void *&) {
  UseDefault = false;
--- a/contrib/llvm/lib/TableGen/StringMatcher.cpp
+++ b/contrib/llvm/lib/TableGen/StringMatcher.cpp
@ -11,9 +11,15 @@
 //
 //===----------------------------------------------------------------------===//

-#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include <cassert>
 #include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
 using namespace llvm;

 /// FindFirstNonCommonLetter - Find the first character in the keys of the
@ -67,7 +73,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
  }
  
  // Bucket the matches by the character we are comparing.
-  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
+  std::map<char, std::vector<const StringPair*>> MatchesByLetter;
  
  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
    MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
@ -91,7 +97,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
      // FIXME: Need to escape general strings.
      OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
         << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
-         << NumChars << "))\n";
+         << NumChars << ") != 0)\n";
      OS << Indent << "  break;\n";
    }
    
@ -103,7 +109,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
  OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
  OS << Indent << "default: break;\n";
  
-  for (std::map<char, std::vector<const StringPair*> >::iterator LI = 
+  for (std::map<char, std::vector<const StringPair*>>::iterator LI = 
       MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
    // TODO: escape hard stuff (like \n) if we ever care about it.
    OS << Indent << "case '" << LI->first << "':\t // "
@ -118,7 +124,6 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
  return true;
 }

-
 /// Emit - Top level entry point.
 ///
 void StringMatcher::Emit(unsigned Indent) const {
@ -126,7 +131,7 @@ void StringMatcher::Emit(unsigned Indent) const {
  if (Matches.empty()) return;
  
  // First level categorization: group strings by length.
-  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
+  std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
  
  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
    MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
@ -136,7 +141,7 @@ void StringMatcher::Emit(unsigned Indent) const {
  OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
  OS.indent(Indent*2+2) << "default: break;\n";
  
-  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
+  for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI =
       MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
    OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
       << LI->second.size()
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@ -264,9 +264,13 @@ def ProcFalkor  : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
                                   "Qualcomm Falkor processors", [
                                   FeatureCRC,
                                   FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
                                   FeatureFPARMv8,
                                   FeatureNEON,
-                                   FeaturePerfMon
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive,
+                                   FeatureZCZeroing
                                   ]>;

 def ProcVulcan  : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@ -76,7 +76,6 @@ public:
  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
  void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);

-  void EmitXRayTable();
  void EmitSled(const MachineInstr &MI, SledKind Kind);

  /// \brief tblgen'erated driver function for lowering simple MI->MC
@ -95,7 +94,7 @@ public:
    AArch64FI = F.getInfo<AArch64FunctionInfo>();
    STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
    bool Result = AsmPrinter::runOnMachineFunction(F);
-    EmitXRayTable();
+    emitXRayTable();
    return Result;
  }

@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
  EmitSled(MI, SledKind::TAIL_CALL);
 }

-void AArch64AsmPrinter::EmitXRayTable()
-{
-  //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid
-  // code duplication as it is now for x86_64, ARM32 and AArch64.
-  if (Sleds.empty())
-    return;
-
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  auto Fn = MF->getFunction();
-  MCSection *Section;
-
-  if (STI->isTargetELF()) {
-    if (Fn->hasComdat())
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-        ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-        Fn->getComdat()->getName());
-    else
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-        ELF::SHF_ALLOC);
-  } else if (STI->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  // Before we switch over, we force a reference to a label inside the
-  // xray_instr_map section. Since EmitXRayTable() is always called just
-  // before the function's end, we assume that this is happening after the
-  // last return instruction.
-  //
-  // We then align the reference to 16 byte boundaries, which we determined
-  // experimentally to be beneficial to avoid causing decoder stalls.
-  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
-  OutStreamer->EmitCodeAlignment(16);
-  OutStreamer->EmitSymbolValue(Tmp, 8, false);
-  OutStreamer->SwitchSection(Section);
-  OutStreamer->EmitLabel(Tmp);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(14);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
-
 void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
 {
  static const int8_t NoopsInSledCount = 7;
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
  bool IsUnscaled = TII->isUnscaledLdSt(MI);
  int Offset = getLdStOffsetOp(MI).getImm();
  int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+  // Allow one more for offset.
+  if (Offset > 0)
+    Offset -= OffsetStride;
  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
    return false;

--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
  NODE_NAME_CASE(KILL)
  case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
  NODE_NAME_CASE(SENDMSG)
+  NODE_NAME_CASE(SENDMSGHALT)
  NODE_NAME_CASE(INTERP_MOV)
  NODE_NAME_CASE(INTERP_P1)
  NODE_NAME_CASE(INTERP_P2)
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@ -313,6 +313,7 @@ enum NodeType : unsigned {
  /// Pointer to the start of the shader's constant data.
  CONST_DATA_PTR,
  SENDMSG,
+  SENDMSGHALT,
  INTERP_MOV,
  INTERP_P1,
  INTERP_P2,
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
                    [SDNPHasChain, SDNPInGlue]>;

+def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
+                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                    [SDNPHasChain, SDNPInGlue]>;
+
 def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
                        SDTypeProfile<1, 3, [SDTCisFP<0>]>,
                        [SDNPInGlue]>;
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();

  switch (IntrinsicID) {
-  case AMDGPUIntrinsic::SI_sendmsg: {
+  case AMDGPUIntrinsic::SI_sendmsg:
+  case Intrinsic::amdgcn_s_sendmsg: {
    Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
    SDValue Glue = Chain.getValue(1);
    return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
                       Op.getOperand(2), Glue);
  }
+  case Intrinsic::amdgcn_s_sendmsghalt: {
+    Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+    SDValue Glue = Chain.getValue(1);
+    return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
+                       Op.getOperand(2), Glue);
+  }
  case AMDGPUIntrinsic::SI_tbuffer_store: {
    SDValue Ops[] = {
      Chain,
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
    return;

  // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
-  if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+  if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
    BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
    LastInstWritesM0 = false;
    return;
@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
      // signalling other hardware blocks
      if ((I->getOpcode() == AMDGPU::S_BARRIER &&
               ST->needWaitcntBeforeBarrier()) ||
-           I->getOpcode() == AMDGPU::S_SENDMSG)
+           I->getOpcode() == AMDGPU::S_SENDMSG ||
+           I->getOpcode() == AMDGPU::S_SENDMSGHALT)
        Required = LastIssued;
      else
        Required = handleOperands(*I);
--- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
 def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
  [(AMDGPUsendmsg (i32 imm:$simm16))]
 >;
+
+def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
+  [(AMDGPUsendmsghalt (i32 imm:$simm16))]
+>;
 } // End Uses = [EXEC, M0]

-def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
 def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
 def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
  let simm16 = 0;
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  // Emit the rest of the function body.
  EmitFunctionBody();

-  // Emit the XRay table for this function.
-  EmitXRayTable();
-
  // If we need V4T thumb mode Register Indirect Jump pads, emit them.
  // These are created per function, rather than per TU, since it's
  // relatively easy to exceed the thumb branch range within a TU.
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@ -113,9 +113,6 @@ public:
  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
  void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
-  // Helper function that emits the XRay sleds we've collected for a particular
-  // function.
-  void EmitXRayTable();

 private:
  void EmitSled(const MachineInstr &MI, SledKind Kind);
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@ -22,9 +22,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCStreamer.h"
 using namespace llvm;
@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
 {
  EmitSled(MI, SledKind::TAIL_CALL);
 }
-
-void ARMAsmPrinter::EmitXRayTable()
-{
-  if (Sleds.empty())
-    return;
-
-  MCSection *Section = nullptr;
-  if (Subtarget->isTargetELF()) {
-    Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                       ELF::SHF_ALLOC | ELF::SHF_GROUP |
-                                           ELF::SHF_MERGE,
-                                       0, CurrentFnSym->getName());
-  } else if (Subtarget->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  OutStreamer->SwitchSection(Section);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 4);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(6);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@ -53,28 +53,36 @@
 //
 // The code below is intended to be fully target-independent.

+#include "BitTracker.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
-#include "BitTracker.h"
+#include <iterator>
+#include <cassert>
+#include <cstdint>

 using namespace llvm;

 typedef BitTracker BT;

 namespace {
+
  // Local trickery to pretty print a register (without the whole "%vreg"
  // business).
  struct printv {
    printv(unsigned r) : R(r) {}
+
    unsigned R;
  };
+
  raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
    if (PV.R)
      OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
@ -82,9 +90,11 @@ namespace {
      OS << 's';
    return OS;
  }
-}
+
+} // end anonymous namespace

 namespace llvm {
+
  raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) {
    switch (BV.Type) {
      case BT::BitValue::Top:
@ -167,14 +177,14 @@ namespace llvm {

    return OS;
  }
-}
+
+} // end namespace llvm

 void BitTracker::print_cells(raw_ostream &OS) const {
  for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
    dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
 }

-
 BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
    : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}

@ -182,7 +192,6 @@ BitTracker::~BitTracker() {
  delete &Map;
 }

-
 // If we were allowed to update a cell for a part of a register, the meet
 // operation would need to be parametrized by the register number and the
 // exact part of the register, so that the computer BitRefs correspond to
@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
  return Changed;
 }

-
 // Insert the entire cell RC into the current cell at position given by M.
 BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
      const BitMask &M) {
@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
  return *this;
 }

-
 BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
  uint16_t B = M.first(), E = M.last(), W = width();
  assert(B < W && E < W);
@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
  return RC;
 }

-
 BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
  // Rotate left (i.e. towards increasing bit indices).
  // Swap the two parts:  [0..W-Sh-1] [W-Sh..W-1]
@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
  return *this;
 }

-
 BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
      const BitValue &V) {
  assert(B <= E);
@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
  return *this;
 }

-
 BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
  // Append the cell given as the argument to the "this" cell.
  // Bit 0 of RC becomes bit W of the result, where W is this->width().
@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
  return *this;
 }

-
 uint16_t BT::RegisterCell::ct(bool B) const {
  uint16_t W = width();
  uint16_t C = 0;
@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const {
  return C;
 }

-
 uint16_t BT::RegisterCell::cl(bool B) const {
  uint16_t W = width();
  uint16_t C = 0;
@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const {
  return C;
 }

-
 bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
  uint16_t W = Bits.size();
  if (RC.Bits.size() != W)
@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
  return true;
 }

-
 uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
  // The general problem is with finding a register class that corresponds
  // to a given reference reg:sub. There can be several such classes, and
@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
  return BW;
 }

-
 BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
      const CellMapType &M) const {
  uint16_t BW = getRegBitWidth(RR);
@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
  return RegisterCell::top(BW);
 }

-
 void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
      CellMapType &M) const {
  // While updating the cell map can be done in a meaningful way for
@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
  M[RR.Reg] = RC;
 }

-
 // Check if the cell represents a compile-time integer value.
 bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
  uint16_t W = A.width();
@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
  return true;
 }

-
 // Convert a cell to the integer value. The result must fit in uint64_t.
 uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
  assert(isInt(A));
@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
  return Val;
 }

-
 // Evaluator helper functions. These implement some common operation on
 // register cells that can be used to implement target-specific instructions
 // in a target-specific evaluator.
@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
  const APInt &A = CI->getValue();
  uint16_t BW = A.getBitWidth();
@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
      const RegisterCell &A2) const {
  uint16_t W = A1.width();
@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
      const RegisterCell &A2) const {
  uint16_t W = A1.width();
@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
      const RegisterCell &A2) const {
  uint16_t W = A1.width() + A2.width();
-  uint16_t Z = A1.ct(0) + A2.ct(0);
+  uint16_t Z = A1.ct(false) + A2.ct(false);
  RegisterCell Res(W);
  Res.fill(0, Z, BitValue::Zero);
  Res.fill(Z, W, BitValue::self());
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
      const RegisterCell &A2) const {
  uint16_t W = A1.width() + A2.width();
-  uint16_t Z = A1.ct(0) + A2.ct(0);
+  uint16_t Z = A1.ct(false) + A2.ct(false);
  RegisterCell Res(W);
  Res.fill(0, Z, BitValue::Zero);
  Res.fill(Z, W, BitValue::self());
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
      uint16_t Sh) const {
  assert(Sh <= A1.width());
@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
      uint16_t Sh) const {
  uint16_t W = A1.width();
@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
      uint16_t Sh) const {
  uint16_t W = A1.width();
@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
      const RegisterCell &A2) const {
  uint16_t W = A1.width();
@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
      const RegisterCell &A2) const {
  uint16_t W = A1.width();
@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
      const RegisterCell &A2) const {
  uint16_t W = A1.width();
@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
  uint16_t W = A1.width();
  RegisterCell Res(W);
@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
      uint16_t BitN) const {
  assert(BitN < A1.width());
@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
      uint16_t BitN) const {
  assert(BitN < A1.width());
@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
      uint16_t W) const {
  uint16_t C = A1.cl(B), AW = A1.width();
@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
  return RegisterCell::self(0, W);
 }

-
 BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
      uint16_t W) const {
  uint16_t C = A1.ct(B), AW = A1.width();
@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
  return RegisterCell::self(0, W);
 }

-
 BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
      uint16_t FromN) const {
  uint16_t W = A1.width();
@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
      uint16_t FromN) const {
  uint16_t W = A1.width();
@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
      uint16_t B, uint16_t E) const {
  uint16_t W = A1.width();
@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
  return Res;
 }

-
 BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
      const RegisterCell &A2, uint16_t AtN) const {
  uint16_t W1 = A1.width(), W2 = A2.width();
@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
  return Res;
 }

-
 BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
  assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
  uint16_t W = getRegBitWidth(Reg);
@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI,
  return true;
 }

-
 // Main W-Z implementation.

 void BT::visitPHI(const MachineInstr &PI) {
@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
  }
 }

-
 void BT::visitUsesOf(unsigned Reg) {
  if (Trace)
    dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) {
  }
 }

-
 BT::RegisterCell BT::get(RegisterRef RR) const {
  return ME.getCell(RR, Map);
 }

-
 void BT::put(RegisterRef RR, const RegisterCell &RC) {
  ME.putCell(RR, RC, Map);
 }

-
 // Replace all references to bits from OldRR with the corresponding bits
 // in NewRR.
 void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
  }
 }

-
 // Check if the block has been "executed" during propagation. (If not, the
 // block is dead, but it may still appear to be reachable.)
 bool BT::reached(const MachineBasicBlock *B) const {
@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const {
  return false;
 }

-
 // Visit an individual instruction. This could be a newly added instruction,
 // or one that has been modified by an optimization.
 void BT::visit(const MachineInstr &MI) {
@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) {
    FlowQ.pop();
 }

-
 void BT::reset() {
  EdgeExec.clear();
  InstrExec.clear();
  Map.clear();
 }

-
 void BT::run() {
  reset();
  assert(FlowQ.empty());
@ -1141,4 +1106,3 @@ void BT::run() {
  if (Trace)
    print_cells(dbgs() << "Cells after propagation:\n");
 }
-
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@ -1,4 +1,4 @@
-//===--- BitTracker.h -----------------------------------------------------===//
+//===--- BitTracker.h -------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -7,24 +7,27 @@
 //
 //===----------------------------------------------------------------------===//

-#ifndef BITTRACKER_H
-#define BITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H

 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
-
+#include "llvm/CodeGen/MachineOperand.h"
+#include <cassert>
+#include <cstdint>
 #include <map>
 #include <queue>
 #include <set>
+#include <utility>

 namespace llvm {
-  class ConstantInt;
-  class MachineRegisterInfo;
-  class MachineBasicBlock;
-  class MachineInstr;
-  class MachineOperand;
-  class raw_ostream;
+
+class ConstantInt;
+class MachineRegisterInfo;
+class MachineBasicBlock;
+class MachineInstr;
+class raw_ostream;

 struct BitTracker {
  struct BitRef;
@ -76,19 +79,19 @@ private:
  CellMapType &Map;
 };

-
 // Abstraction of a reference to bit at position Pos from a register Reg.
 struct BitTracker::BitRef {
  BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+
  bool operator== (const BitRef &BR) const {
    // If Reg is 0, disregard Pos.
    return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
  }
+
  unsigned Reg;
  uint16_t Pos;
 };

-
 // Abstraction of a register reference in MachineOperand.  It contains the
 // register number and the subregister index.
 struct BitTracker::RegisterRef {
@ -96,10 +99,10 @@ struct BitTracker::RegisterRef {
    : Reg(R), Sub(S) {}
  RegisterRef(const MachineOperand &MO)
      : Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+
  unsigned Reg, Sub;
 };

-
 // Value that a single bit can take.  This is outside of the context of
 // any register, it is more of an abstraction of the two-element set of
 // possible bit values.  One extension here is the "Ref" type, which
@ -158,6 +161,7 @@ struct BitTracker::BitValue {
  bool operator!= (const BitValue &V) const {
    return !operator==(V);
  }
+
  bool is(unsigned T) const {
    assert(T == 0 || T == 1);
    return T == 0 ? Type == Zero
@ -209,6 +213,7 @@ struct BitTracker::BitValue {
  bool num() const {
    return Type == Zero || Type == One;
  }
+
  operator bool() const {
    assert(Type == Zero || Type == One);
    return Type == One;
@ -217,7 +222,6 @@ struct BitTracker::BitValue {
  friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
 };

-
 // This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
 inline BitTracker::BitValue
 BitTracker::BitValue::ref(const BitValue &V) {
@ -228,25 +232,25 @@ BitTracker::BitValue::ref(const BitValue &V) {
  return self();
 }

-
 inline BitTracker::BitValue
 BitTracker::BitValue::self(const BitRef &Self) {
  return BitValue(Self.Reg, Self.Pos);
 }

-
 // A sequence of bits starting from index B up to and including index E.
 // If E < B, the mask represents two sections: [0..E] and [B..W) where
 // W is the width of the register.
 struct BitTracker::BitMask {
-  BitMask() : B(0), E(0) {}
+  BitMask() = default;
  BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+
  uint16_t first() const { return B; }
  uint16_t last() const { return E; }
-private:
-  uint16_t B, E;
-};

+private:
+  uint16_t B = 0;
+  uint16_t E = 0;
+};

 // Representation of a register: a list of BitValues.
 struct BitTracker::RegisterCell {
@ -255,6 +259,7 @@ struct BitTracker::RegisterCell {
  uint16_t width() const {
    return Bits.size();
  }
+
  const BitValue &operator[](uint16_t BitN) const {
    assert(BitN < Bits.size());
    return Bits[BitN];
@ -297,12 +302,10 @@ private:
  friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
 };

-
 inline bool BitTracker::has(unsigned Reg) const {
  return Map.find(Reg) != Map.end();
 }

-
 inline const BitTracker::RegisterCell&
 BitTracker::lookup(unsigned Reg) const {
  CellMapType::const_iterator F = Map.find(Reg);
@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const {
  return F->second;
 }

-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
  RegisterCell RC(Width);
@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
  return RC;
 }

-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::top(uint16_t Width) {
  RegisterCell RC(Width);
@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) {
  return RC;
 }

-
 inline BitTracker::RegisterCell
 BitTracker::RegisterCell::ref(const RegisterCell &C) {
  uint16_t W = C.width();
@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
 struct BitTracker::MachineEvaluator {
  MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
      : TRI(T), MRI(M) {}
-  virtual ~MachineEvaluator() {}
+  virtual ~MachineEvaluator() = default;

  uint16_t getRegBitWidth(const RegisterRef &RR) const;

  RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
  void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+
  // A result of any operation should use refs to the source cells, not
  // the cells directly. This function is a convenience wrapper to quickly
  // generate a ref for a cell corresponding to a register reference.
@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator {

 } // end namespace llvm

-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@ -7,16 +7,30 @@
 //
 //===----------------------------------------------------------------------===//

-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
 #include "Hexagon.h"
+#include "HexagonBitTracker.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonTargetMachine.h"
-#include "HexagonBitTracker.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
+#include <vector>

 using namespace llvm;

@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
  }
 }

-
 BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+  using namespace Hexagon;
+
  if (Sub == 0)
    return MachineEvaluator::mask(Reg, 0);
-  using namespace Hexagon;
  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
  unsigned ID = RC->getID();
  uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
 }

 namespace {
+
 class RegisterRefs {
  std::vector<BT::RegisterRef> Vector;

@ -117,17 +132,21 @@ public:
  }

  size_t size() const { return Vector.size(); }
+
  const BT::RegisterRef &operator[](unsigned n) const {
    // The main purpose of this operator is to assert with bad argument.
    assert(n < Vector.size());
    return Vector[n];
  }
 };
-}
+
+} // end anonymous namespace

 bool HexagonEvaluator::evaluate(const MachineInstr &MI,
                                const CellMapType &Inputs,
                                CellMapType &Outputs) const {
+  using namespace Hexagon;
+
  unsigned NumDefs = 0;

  // Sanity verification: there should not be any defs with subregisters.
@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
  if (NumDefs == 0)
    return false;

-  using namespace Hexagon;
  unsigned Opc = MI.getOpcode();

  if (MI.mayLoad()) {
@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
    case S2_cl0:
    case S2_cl0p:
      // Always produce a 32-bit result.
-      return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+      return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs);
    case S2_cl1:
    case S2_cl1p:
-      return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+      return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs);
    case S2_clb:
    case S2_clbp: {
      uint16_t W1 = getRegBitWidth(Reg[1]);
@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
    }
    case S2_ct0:
    case S2_ct0p:
-      return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+      return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs);
    case S2_ct1:
    case S2_ct1p:
-      return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+      return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs);
    case S5_popcountp:
      // TODO
      break;
@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
 bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
                                    const CellMapType &Inputs,
                                    CellMapType &Outputs) const {
+  using namespace Hexagon;
+
  if (TII.isPredicated(MI))
    return false;
  assert(MI.mayLoad() && "A load that mayn't?");
@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,

  uint16_t BitNum;
  bool SignEx;
-  using namespace Hexagon;

  switch (Opc) {
    default:
@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI,
  return true;
 }

-
 unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
  using namespace Hexagon;
+
  bool Is64 = DoubleRegsRegClass.contains(PReg);
  assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));

@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
  return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
 }

-
 unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
  typedef MachineRegisterInfo::livein_iterator iterator;
  for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
@ -1,4 +1,4 @@
-//===--- HexagonBitTracker.h ----------------------------------------------===//
+//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -7,15 +7,17 @@
 //
 //===----------------------------------------------------------------------===//

-#ifndef HEXAGONBITTRACKER_H
-#define HEXAGONBITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H

 #include "BitTracker.h"
 #include "llvm/ADT/DenseMap.h"
+#include <cstdint>

 namespace llvm {
-  class HexagonInstrInfo;
-  class HexagonRegisterInfo;
+
+class HexagonInstrInfo;
+class HexagonRegisterInfo;

 struct HexagonEvaluator : public BitTracker::MachineEvaluator {
  typedef BitTracker::CellMapType CellMapType;
@ -49,10 +51,12 @@ private:
  // Type of formal parameter extension.
  struct ExtType {
    enum { SExt, ZExt };
-    char Type;
-    uint16_t Width;
-    ExtType() : Type(0), Width(0) {}
+
+    ExtType() = default;
    ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+
+    char Type = 0;
+    uint16_t Width = 0;
  };
  // Map VR -> extension type.
  typedef DenseMap<unsigned, ExtType> RegExtMap;
@ -61,4 +65,4 @@ private:

 } // end namespace llvm

-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@ -16,9 +16,14 @@

 #include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
+#include <vector>

 #define GET_INSTRINFO_HEADER
 #include "HexagonGenInstrInfo.inc"
@ -29,9 +34,10 @@ struct EVT;
 class HexagonSubtarget;

 class HexagonInstrInfo : public HexagonGenInstrInfo {
-  virtual void anchor();
  const HexagonRegisterInfo RI;

+  virtual void anchor();
+
 public:
  explicit HexagonInstrInfo(HexagonSubtarget &ST);

@ -260,7 +266,7 @@ public:
  /// PredCost.
  unsigned getInstrLatency(const InstrItineraryData *ItinData,
                           const MachineInstr &MI,
-                           unsigned *PredCost = 0) const override;
+                           unsigned *PredCost = nullptr) const override;

  /// Create machine specific model for scheduling.
  DFAPacketizer *
@ -378,7 +384,6 @@ public:
  bool PredOpcodeHasJMP_c(unsigned Opcode) const;
  bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;

-
  short getAbsoluteForm(const MachineInstr &MI) const;
  unsigned getAddrMode(const MachineInstr &MI) const;
  unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
@ -421,13 +426,11 @@ public:
  unsigned getUnits(const MachineInstr &MI) const;
  unsigned getValidSubTargets(const unsigned Opcode) const;

-
  /// getInstrTimingClassLatency - Compute the instruction latency of a given
  /// instruction using Timing Class information, if available.
  unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
  unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;

-
  void immediateExtend(MachineInstr &MI) const;
  bool invertAndChangeJumpTarget(MachineInstr &MI,
                                 MachineBasicBlock* NewTarget) const;
@ -438,6 +441,6 @@ public:
  short xformRegToImmOffset(const MachineInstr &MI) const;
 };

-}
+} // end namespace llvm

-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@ -15,33 +15,31 @@

 namespace llvm {

-  namespace Hexagon {
+namespace Hexagon {
+
    const unsigned int StartPacket = 0x1;
    const unsigned int EndPacket = 0x2;
-  }

+} // end namespace Hexagon

 /// Hexagon target-specific information for each MachineFunction.
 class HexagonMachineFunctionInfo : public MachineFunctionInfo {
  // SRetReturnReg - Some subtargets require that sret lowering includes
  // returning the value of the returned struct in a register. This field
  // holds the virtual register into which the sret argument is passed.
-  unsigned SRetReturnReg;
-  unsigned StackAlignBaseVReg;    // Aligned-stack base register (virtual)
-  unsigned StackAlignBasePhysReg; //                             (physical)
+  unsigned SRetReturnReg = 0;
+  unsigned StackAlignBaseVReg = 0;    // Aligned-stack base register (virtual)
+  unsigned StackAlignBasePhysReg = 0; //                             (physical)
  int VarArgsFrameIndex;
-  bool HasClobberLR;
-  bool HasEHReturn;
+  bool HasClobberLR = false;
+  bool HasEHReturn = false;
  std::map<const MachineInstr*, unsigned> PacketInfo;
  virtual void anchor();

 public:
-  HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0),
-      StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {}
+  HexagonMachineFunctionInfo() = default;

-  HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
-      StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0),
-      HasEHReturn(false) {}
+  HexagonMachineFunctionInfo(MachineFunction &MF) {}

  unsigned getSRetReturnReg() const { return SRetReturnReg; }
  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@ -75,6 +73,7 @@ public:
  void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; }
  unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; }
 };
-} // End llvm namespace

-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@ -10,17 +10,27 @@
 // This file contains the declarations of the HexagonTargetAsmInfo properties.
 //
 //===----------------------------------------------------------------------===//
+
 #define DEBUG_TYPE "hexagon-sdata"

-#include "HexagonTargetMachine.h"
 #include "HexagonTargetObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"

 using namespace llvm;

@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
 // (e.g. -debug and -debug-only=globallayout)
 #define TRACE_TO(s, X) s << X
 #ifdef NDEBUG
-#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0)
+#define TRACE(X)                                                               \
+  do {                                                                         \
+    if (TraceGVPlacement) {                                                    \
+      TRACE_TO(errs(), X);                                                     \
+    }                                                                          \
+  } while (false)
 #else
-#define TRACE(X) \
-  do { \
-    if (TraceGVPlacement) { TRACE_TO(errs(), X); } \
-    else { DEBUG( TRACE_TO(dbgs(), X) ); } \
-  } while (0)
+#define TRACE(X)                                                               \
+  do {                                                                         \
+    if (TraceGVPlacement) {                                                    \
+      TRACE_TO(errs(), X);                                                     \
+    } else {                                                                   \
+      DEBUG(TRACE_TO(dbgs(), X));                                              \
+    }                                                                          \
+  } while (false)
 #endif

 // Returns true if the section name is such that the symbol will be put
@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) {
         Sec.find(".scommon.") != StringRef::npos;
 }

-
 static const char *getSectionSuffixForSize(unsigned Size) {
  switch (Size) {
  default:
@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
  return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM);
 }

-
 /// Return true if this global value should be placed into small data/bss
 /// section.
 bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
  return true;
 }

-
 bool HexagonTargetObjectFile::isSmallDataEnabled() const {
  return SmallDataThreshold > 0;
 }

-
 unsigned HexagonTargetObjectFile::getSmallDataSize() const {
  return SmallDataThreshold;
 }

-
 /// Descends any type down to "elementary" components,
 /// discovering the smallest addressable one.
 /// If zero is returned, declaration will not be modified.
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@ -1,5 +1,4 @@
-
-//=== HexagonMCCompound.cpp - Hexagon Compound checker  -------===//
+//=== HexagonMCCompound.cpp - Hexagon Compound checker  -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -11,18 +10,17 @@
 // This file is looks at a packet and tries to form compound insns
 //
 //===----------------------------------------------------------------------===//
+
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCShuffler.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCAssembler.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>

 using namespace llvm;
 using namespace Hexagon;
@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = {
 };

 // enum HexagonII::CompoundGroup
-namespace {
-unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
+static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
  unsigned DstReg, SrcReg, Src1Reg, Src2Reg;

  switch (MI.getOpcode()) {
@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {

  return HexagonII::HCG_None;
 }
-}

 /// getCompoundOp - Return the index from 0-7 into the above opcode lists.
-namespace {
-unsigned getCompoundOp(MCInst const &HMCI) {
+static unsigned getCompoundOp(MCInst const &HMCI) {
  const MCOperand &Predicate = HMCI.getOperand(0);
  unsigned PredReg = Predicate.getReg();

@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) {
    return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
  }
 }
-}

-namespace {
-MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
-  MCInst *CompoundInsn = 0;
+static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
+                               MCInst const &R) {
+  MCInst *CompoundInsn = nullptr;
  unsigned compoundOpcode;
  MCOperand Rs, Rt;
  int64_t Value;
@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {

  return CompoundInsn;
 }
-}

 /// Non-Symmetrical. See if these two instructions are fit for compound pair.
-namespace {
-bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
-                           MCInst const &MIb, bool IsExtendedB) {
+static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
+                                  MCInst const &MIb, bool IsExtendedB) {
  unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
  unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
  // We have two candidates - check that this is the same register
@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
  return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
          (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
 }
-}

-namespace {
-bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
+static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
+                            MCInst &MCI) {
  assert(HexagonMCInstrInfo::isBundle(MCI));
  bool JExtended = false;
  for (MCInst::iterator J =
@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
      JExtended = true;
      continue;
    }
-    if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
-        HexagonII::TypeJ) {
+    if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) {
      // Try to pair with another insn (B)undled with jump.
      bool BExtended = false;
      for (MCInst::iterator B =
@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
  }
  return false;
 }
-}

 /// tryCompound - Given a bundle check for compound insns when one
 /// is found update the contents fo the bundle with the compound insn.
@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
  // a compound is found.
  while (lookForCompound(MCII, Context, MCI))
    ;
-
-  return;
 }
--- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
@ -1,4 +1,4 @@
-//===--- RDFCopy.h --------------------------------------------------------===//
+//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -7,23 +7,26 @@
 //
 //===----------------------------------------------------------------------===//

-#ifndef RDF_COPY_H
-#define RDF_COPY_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H

 #include "RDFGraph.h"
 #include <map>
 #include <vector>

 namespace llvm {
+
  class MachineBasicBlock;
  class MachineDominatorTree;
  class MachineInstr;

 namespace rdf {
+
  struct CopyPropagation {
    CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
        Trace(false) {}
-    virtual ~CopyPropagation() {}
+
+    virtual ~CopyPropagation() = default;

    bool run();
    void trace(bool On) { Trace = On; }
@ -49,7 +52,9 @@ namespace rdf {
    void updateMap(NodeAddr<InstrNode*> IA);
    bool scanBlock(MachineBasicBlock *B);
  };
-} // namespace rdf
-} // namespace llvm

-#endif
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@ -10,16 +10,31 @@
 // Target-independent, SSA-based data flow graph for register data flow (RDF).
 //
 #include "RDFGraph.h"
-
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominanceFrontier.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <utility>
+#include <vector>

 using namespace llvm;
 using namespace rdf;
@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
  return OS;
 }

-namespace {
-  void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
-        const DataFlowGraph &G) {
-    OS << Print<NodeId>(RA.Id, G) << '<'
-       << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
-    if (RA.Addr->getFlags() & NodeAttrs::Fixed)
-      OS << '!';
-  }
+static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
+                const DataFlowGraph &G) {
+  OS << Print<NodeId>(RA.Id, G) << '<'
+     << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
+  if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+    OS << '!';
 }

 template<>
@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
 }

 namespace {
+
  template <typename T>
  struct PrintListV {
    PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+
    typedef T Type;
    const NodeList &List;
    const DataFlowGraph &G;
@ -201,7 +216,8 @@ namespace {
    }
    return OS;
  }
-}
+
+} // end anonymous namespace

 template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS,
  // Print the target for calls and branches (for readability).
  if (MI.isCall() || MI.isBranch()) {
    MachineInstr::const_mop_iterator T =
-          find_if(MI.operands(),
-                  [] (const MachineOperand &Op) -> bool {
-                    return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
-                  });
+          llvm::find_if(MI.operands(),
+                        [] (const MachineOperand &Op) -> bool {
+                          return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+                        });
    if (T != MI.operands_end()) {
      OS << ' ';
      if (T->isMBB())
@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS,
  return OS;
 }

-} // namespace rdf
-} // namespace llvm
+} // end namespace rdf
+} // end namespace llvm

 // Node allocation functions.
 //
@ -390,7 +406,6 @@ void NodeAllocator::clear() {
  ActiveEnd = nullptr;
 }

-
 // Insert node NA after "this" in the circular chain.
 void NodeBase::append(NodeAddr<NodeBase*> NA) {
  NodeId Nx = Next;
@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
  }
 }

-
 // Fundamental node manipulator functions.

 // Obtain the register reference from a reference node.
@ -590,7 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
  return findBlock(EntryB, G);
 }

-
 // Target operand information.
 //

@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
  return false;
 }

-
 RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
  RegisterId SuperReg = RR.Reg;
  while (true) {
@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const {
  OS << " }";
 }

-
 //
 // The data flow graph construction.
 //
@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const {
 DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
      const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
      const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
-    : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
+    : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
 }

-
 // The implementation of the definition stack.
 // Each register reference has its own definition stack. In particular,
 // for a register references "Reg" and "Reg:subreg" will each have their
@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
  return P;
 }

-
 // Register information.

 // Get the list of references aliased to RR. Lane masks are ignored.
@ -915,7 +924,6 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
  return NA;
 }

-
 // Allocation routines for specific node types/kinds.

 NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
  return false;
 }

-
 // Clear all information in the graph.
 void DataFlowGraph::reset() {
  Memory.clear();
@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() {
  Func = NodeAddr<FuncNode*>();
 }

-
 // Return the next reference node in the instruction node IA that is related
 // to RA. Conceptually, two reference nodes are related if they refer to the
 // same instance of a register access, but differ in flags or other minor
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
@ -1,4 +1,4 @@
-//===--- RDFGraph.h -------------------------------------------------------===//
+//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -221,20 +221,25 @@
 // The statement s5 has two use nodes for t0: u7" and u9". The quotation
 // mark " indicates that the node is a shadow.
 //
-#ifndef RDF_GRAPH_H
-#define RDF_GRAPH_H
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H

 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include <cassert>
+#include <cstdint>
+#include <cstring>
 #include <functional>
 #include <map>
 #include <set>
 #include <unordered_map>
+#include <utility>
 #include <vector>

 // RDF uses uint32_t to refer to registers. This is to ensure that the type
@ -243,6 +248,7 @@
 static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");

 namespace llvm {
+
  class MachineBasicBlock;
  class MachineFunction;
  class MachineInstr;
@ -252,6 +258,7 @@ namespace llvm {
  class TargetInstrInfo;

 namespace rdf {
+
  typedef uint32_t NodeId;
  typedef uint32_t RegisterId;

@ -293,9 +300,11 @@ namespace rdf {
    static uint16_t set_type(uint16_t A, uint16_t T) {
      return (A & ~TypeMask) | T;
    }
+
    static uint16_t set_kind(uint16_t A, uint16_t K) {
      return (A & ~KindMask) | K;
    }
+
    static uint16_t set_flags(uint16_t A, uint16_t F) {
      return (A & ~FlagMask) | F;
    }
@ -326,9 +335,14 @@ namespace rdf {
  };

  template <typename T> struct NodeAddr {
-    NodeAddr() : Addr(nullptr), Id(0) {}
+    NodeAddr() : Addr(nullptr) {}
    NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}

+    // Type cast (casting constructor). The reason for having this class
+    // instead of std::pair.
+    template <typename S> NodeAddr(const NodeAddr<S> &NA)
+      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
    bool operator== (const NodeAddr<T> &NA) const {
      assert((Addr == NA.Addr) == (Id == NA.Id));
      return Addr == NA.Addr;
@ -336,13 +350,9 @@ namespace rdf {
    bool operator!= (const NodeAddr<T> &NA) const {
      return !operator==(NA);
    }
-    // Type cast (casting constructor). The reason for having this class
-    // instead of std::pair.
-    template <typename S> NodeAddr(const NodeAddr<S> &NA)
-      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}

    T Addr;
-    NodeId Id;
+    NodeId Id = 0;
  };

  struct NodeBase;
@ -366,17 +376,20 @@ namespace rdf {
  struct NodeAllocator {
    // Amount of storage for a single node.
    enum { NodeMemSize = 32 };
+
    NodeAllocator(uint32_t NPB = 4096)
        : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
-          IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
+          IndexMask((1 << BitsPerIndex)-1) {
      assert(isPowerOf2_32(NPB));
    }
+
    NodeBase *ptr(NodeId N) const {
      uint32_t N1 = N-1;
      uint32_t BlockN = N1 >> BitsPerIndex;
      uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
      return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
    }
+
    NodeId id(const NodeBase *P) const;
    NodeAddr<NodeBase*> New();
    void clear();
@ -384,6 +397,7 @@ namespace rdf {
  private:
    void startNewBlock();
    bool needNewBlock();
+
    uint32_t makeId(uint32_t Block, uint32_t Index) const {
      // Add 1 to the id, to avoid the id of 0, which is treated as "null".
      return ((Block << BitsPerIndex) | Index) + 1;
@ -392,7 +406,7 @@ namespace rdf {
    const uint32_t NodesPerBlock;
    const uint32_t BitsPerIndex;
    const uint32_t IndexMask;
-    char *ActiveEnd;
+    char *ActiveEnd = nullptr;
    std::vector<char*> Blocks;
    typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
    AllocatorTy MemPool;
@ -405,6 +419,7 @@ namespace rdf {
    RegisterRef() : RegisterRef(0) {}
    explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
      : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+
    operator bool() const { return Reg != 0 && Mask.any(); }
    bool operator== (const RegisterRef &RR) const {
      return Reg == RR.Reg && Mask == RR.Mask;
@ -420,7 +435,8 @@ namespace rdf {

  struct TargetOperandInfo {
    TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
-    virtual ~TargetOperandInfo() {}
+    virtual ~TargetOperandInfo() = default;
+
    virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
    virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
    virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
@ -428,7 +444,6 @@ namespace rdf {
    const TargetInstrInfo &TII;
  };

-
  // Packed register reference. Only used for storage.
  struct PackedRegisterRef {
    RegisterId Reg;
@ -442,11 +457,13 @@ namespace rdf {
  template <typename T, unsigned N = 32>
  struct IndexedSet {
    IndexedSet() : Map() { Map.reserve(N); }
+
    T get(uint32_t Idx) const {
      // Index Idx corresponds to Map[Idx-1].
      assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
      return Map[Idx-1];
    }
+
    uint32_t insert(T Val) {
      // Linear search.
      auto F = llvm::find(Map, Val);
@ -455,11 +472,13 @@ namespace rdf {
      Map.push_back(Val);
      return Map.size();  // Return actual_index + 1.
    }
+
    uint32_t find(T Val) const {
      auto F = llvm::find(Map, Val);
      assert(F != Map.end());
      return F - Map.begin();
    }
+
  private:
    std::vector<T> Map;
  };
@ -478,12 +497,14 @@ namespace rdf {
      assert(LM.any());
      return LM.all() ? 0 : find(LM);
    }
+
    PackedRegisterRef pack(RegisterRef RR) {
      return { RR.Reg, getIndexForLaneMask(RR.Mask) };
    }
    PackedRegisterRef pack(RegisterRef RR) const {
      return { RR.Reg, getIndexForLaneMask(RR.Mask) };
    }
+
    RegisterRef unpack(PackedRegisterRef PR) const {
      return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
    }
@ -491,11 +512,8 @@ namespace rdf {

  struct RegisterAggr {
    RegisterAggr(const TargetRegisterInfo &tri)
-        : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false),
-          TRI(tri) {}
-    RegisterAggr(const RegisterAggr &RG)
-        : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits),
-          CheckUnits(RG.CheckUnits), TRI(RG.TRI) {}
+        : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
+    RegisterAggr(const RegisterAggr &RG) = default;

    bool empty() const { return Masks.empty(); }
    bool hasAliasOf(RegisterRef RR) const;
@ -530,11 +548,11 @@ namespace rdf {
    const TargetRegisterInfo &TRI;
  };

-
  struct NodeBase {
  public:
    // Make sure this is a POD.
    NodeBase() = default;
+
    uint16_t getType()  const { return NodeAttrs::type(Attrs); }
    uint16_t getKind()  const { return NodeAttrs::kind(Attrs); }
    uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
@ -596,29 +614,36 @@ namespace rdf {

  struct RefNode : public NodeBase {
    RefNode() = default;
+
    RegisterRef getRegRef(const DataFlowGraph &G) const;
+
    MachineOperand &getOp() {
      assert(!(getFlags() & NodeAttrs::PhiRef));
      return *Ref.Op;
    }
+
    void setRegRef(RegisterRef RR, DataFlowGraph &G);
    void setRegRef(MachineOperand *Op, DataFlowGraph &G);
+
    NodeId getReachingDef() const {
      return Ref.RD;
    }
    void setReachingDef(NodeId RD) {
      Ref.RD = RD;
    }
+
    NodeId getSibling() const {
      return Ref.Sib;
    }
    void setSibling(NodeId Sib) {
      Ref.Sib = Sib;
    }
+
    bool isUse() const {
      assert(getType() == NodeAttrs::Ref);
      return getKind() == NodeAttrs::Use;
    }
+
    bool isDef() const {
      assert(getType() == NodeAttrs::Ref);
      return getKind() == NodeAttrs::Def;
@ -702,6 +727,7 @@ namespace rdf {
    MachineBasicBlock *getCode() const {
      return CodeNode::getCode<MachineBasicBlock*>();
    }
+
    void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
  };

@ -709,6 +735,7 @@ namespace rdf {
    MachineFunction *getCode() const {
      return CodeNode::getCode<MachineFunction*>();
    }
+
    NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
        const DataFlowGraph &G) const;
    NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
@ -723,6 +750,7 @@ namespace rdf {
    template <typename T> T ptr(NodeId N) const {
      return static_cast<T>(ptr(N));
    }
+
    NodeId id(const NodeBase *P) const;

    template <typename T> NodeAddr<T> addr(NodeId N) const {
@ -738,13 +766,17 @@ namespace rdf {

    struct DefStack {
      DefStack() = default;
+
      bool empty() const { return Stack.empty() || top() == bottom(); }
+
    private:
      typedef NodeAddr<DefNode*> value_type;
      struct Iterator {
        typedef DefStack::value_type value_type;
+
        Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
        Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+
        value_type operator*() const {
          assert(Pos >= 1);
          return DS.Stack[Pos-1];
@ -755,14 +787,17 @@ namespace rdf {
        }
        bool operator==(const Iterator &It) const { return Pos == It.Pos; }
        bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+
      private:
        Iterator(const DefStack &S, bool Top);
+
        // Pos-1 is the index in the StorageType object that corresponds to
        // the top of the DefStack.
        const DefStack &DS;
        unsigned Pos;
        friend struct DefStack;
      };
+
    public:
      typedef Iterator iterator;
      iterator top() const { return Iterator(*this, true); }
@ -773,14 +808,18 @@ namespace rdf {
      void pop();
      void start_block(NodeId N);
      void clear_block(NodeId N);
+
    private:
      friend struct Iterator;
      typedef std::vector<value_type> StorageType;
+
      bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
        return (P.Addr == nullptr) && (N == 0 || P.Id == N);
      }
+
      unsigned nextUp(unsigned P) const;
      unsigned nextDown(unsigned P) const;
+
      StorageType Stack;
    };

@ -819,6 +858,7 @@ namespace rdf {
      if (RemoveFromOwner)
        removeFromOwner(UA);
    }
+
    void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
      unlinkDefDF(DA);
      if (RemoveFromOwner)
@ -831,23 +871,28 @@ namespace rdf {
      return BA.Addr->getType() == NodeAttrs::Ref &&
             BA.Addr->getKind() == Kind;
    }
+
    template <uint16_t Kind>
    static bool IsCode(const NodeAddr<NodeBase*> BA) {
      return BA.Addr->getType() == NodeAttrs::Code &&
             BA.Addr->getKind() == Kind;
    }
+
    static bool IsDef(const NodeAddr<NodeBase*> BA) {
      return BA.Addr->getType() == NodeAttrs::Ref &&
             BA.Addr->getKind() == NodeAttrs::Def;
    }
+
    static bool IsUse(const NodeAddr<NodeBase*> BA) {
      return BA.Addr->getType() == NodeAttrs::Ref &&
             BA.Addr->getKind() == NodeAttrs::Use;
    }
+
    static bool IsPhi(const NodeAddr<NodeBase*> BA) {
      return BA.Addr->getType() == NodeAttrs::Code &&
             BA.Addr->getKind() == NodeAttrs::Phi;
    }
+
    static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
      uint16_t Flags = DA.Addr->getFlags();
      return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
@ -902,6 +947,7 @@ namespace rdf {

    void unlinkUseDF(NodeAddr<UseNode*> UA);
    void unlinkDefDF(NodeAddr<DefNode*> DA);
+
    void removeFromOwner(NodeAddr<RefNode*> RA) {
      NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
      IA.Addr->removeMember(RA, *this);
@ -967,7 +1013,6 @@ namespace rdf {
    return MM;
  }

-
  // Optionally print the lane mask, if it is not ~0.
  struct PrintLaneMaskOpt {
    PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
@ -991,7 +1036,9 @@ namespace rdf {
    PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
      : Print<NodeAddr<T>>(x, g) {}
  };
-} // namespace rdf
-} // namespace llvm

-#endif // RDF_GRAPH_H
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@ -28,6 +28,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
  return MipsDAGToDAGISel::runOnMachineFunction(MF);
 }

+void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  SelectionDAGISel::getAnalysisUsage(AU);
+}
+
 void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
                                               MachineFunction &MF) {
  MachineInstrBuilder MIB(MF, &MI);
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@ -28,6 +28,8 @@ private:

  bool runOnMachineFunction(MachineFunction &MF) override;

+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
  void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
                             MachineFunction &MF);

--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
 static bool isFunctionGlobalAddress(SDValue Callee);

 static bool
-resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
+resideInSameSection(const Function *Caller, SDValue Callee,
+                    const TargetMachine &TM) {
  // If !G, Callee can be an external symbol.
  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-  if (!G) return false;
-
-  const GlobalValue *GV = G->getGlobal();
-
-  if (GV->isDeclaration()) return false;
-
-  switch(GV->getLinkage()) {
-  default: llvm_unreachable("unknow linkage type");
-  case GlobalValue::AvailableExternallyLinkage:
-  case GlobalValue::ExternalWeakLinkage:
+  if (!G)
    return false;

-  // Callee with weak linkage is allowed if it has hidden or protected
-  // visibility
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:     // e.g. c++ template instantiation
-    if (GV->hasDefaultVisibility())
-      return false;
+  const GlobalValue *GV = G->getGlobal();
+  if (!GV->isStrongDefinitionForLinker())
+    return false;

-  case GlobalValue::ExternalLinkage:
-  case GlobalValue::InternalLinkage:
-  case GlobalValue::PrivateLinkage:
-    break;
+  // Any explicitly-specified sections and section prefixes must also match.
+  // Also, if we're using -ffunction-sections, then each function is always in
+  // a different section (the same is true for COMDAT functions).
+  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
+      GV->getSection() != Caller->getSection())
+    return false;
+  if (const auto *F = dyn_cast<Function>(GV)) {
+    if (F->getSectionPrefix() != Caller->getSectionPrefix())
+      return false;
  }

-  // With '-fPIC', calling default visiblity function need insert 'nop' after
-  // function call, no matter that function resides in same module or not, so
-  // we treat it as in different module.
-  if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
+  // If the callee might be interposed, then we can't assume the ultimate call
+  // target will be in the same section. Even in cases where we can assume that
+  // interposition won't happen, in any case where the linker might insert a
+  // stub to allow for interposition, we must generate code as though
+  // interposition might occur. To understand why this matters, consider a
+  // situation where: a -> b -> c where the arrows indicate calls. b and c are
+  // in the same section, but a is in a different module (i.e. has a different
+  // TOC base pointer). If the linker allows for interposition between b and c,
+  // then it will generate a stub for the call edge between b and c which will
+  // save the TOC pointer into the designated stack slot allocated by b. If we
+  // return true here, and therefore allow a tail call between b and c, that
+  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
+  // pointer into the stack slot allocated by a (where the a -> b stub saved
+  // a's TOC base pointer). If we're not considering a tail call, but rather,
+  // whether a nop is needed after the call instruction in b, because the linker
+  // will insert a stub, it might complain about a missing nop if we omit it
+  // (although many don't complain in this case).
+  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
    return false;

  return true;
@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
      !isa<ExternalSymbolSDNode>(Callee))
    return false;

-  // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
-  // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
-  // module.
+  // Check if Callee resides in the same section, because for now, PPC64 SVR4
+  // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
+  // section.
  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
-  if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
+  if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
    return false;

  // TCO allows altering callee ABI, so we don't have to check further.
@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
  return CallOpc;
 }

-static
-bool isLocalCall(const SDValue &Callee)
-{
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    return G->getGlobal()->isStrongDefinitionForLinker();
-  return false;
-}
-
 SDValue PPCTargetLowering::LowerCallResult(
    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall(
  // stack frame. If caller and callee belong to the same module (and have the
  // same TOC), the NOP will remain unchanged.

+  MachineFunction &MF = DAG.getMachineFunction();
  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
      !isPatchPoint) {
    if (CallOpc == PPCISD::BCTRL) {
@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall(
      // The address needs to go after the chain input but before the flag (or
      // any other variadic arguments).
      Ops.insert(std::next(Ops.begin()), AddTOC);
-    } else if ((CallOpc == PPCISD::CALL) &&
-               (!isLocalCall(Callee) ||
-                DAG.getTarget().getRelocationModel() == Reloc::PIC_))
+    } else if (CallOpc == PPCISD::CALL &&
+      !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
      // Otherwise insert NOP for non-local calls.
      CallOpc = PPCISD::CALL_NOP;
+    }
  }

  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  EmitFunctionBody();

  // Emit the XRay table for this function.
-  EmitXRayTable();
+  emitXRayTable();

  // We didn't modify anything.
  return false;
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
  MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
  MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
                                                       : std::next(MBBI);
+  PI = skipDebugInstructionsBackward(PI, MBB.begin());
+  if (NI != nullptr)
+    NI = skipDebugInstructionsForward(NI, MBB.end());
+
  unsigned Opc = PI->getOpcode();
  int Offset = 0;

@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
  uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
  uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
  I = MBB.erase(I);
+  auto InsertPos = skipDebugInstructionsForward(I, MBB.end());

  if (!reserveCallFrame) {
    // If the stack pointer can be changed after prologue, turn the
@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,

    if (HasDwarfEHHandlers && !isDestroy &&
        MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
-      BuildCFI(MBB, I, DL,
+      BuildCFI(MBB, InsertPos, DL,
               MCCFIInstruction::createGnuArgsSize(nullptr, Amount));

    if (Amount == 0)
@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
    // If this is a callee-pop calling convention, emit a CFA adjust for
    // the amount the callee popped.
    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
-      BuildCFI(MBB, I, DL, 
+      BuildCFI(MBB, InsertPos, DL,
               MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));

    // Add Amount to SP to destroy a frame, or subtract to setup.
@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
      // Merge with any previous or following adjustment instruction. Note: the
      // instructions merged with here do not have CFI, so their stack
      // adjustments do not feed into CfaAdjustment.
-      StackAdjustment += mergeSPUpdates(MBB, I, true);
-      StackAdjustment += mergeSPUpdates(MBB, I, false);
+      StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
+      StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);

      if (StackAdjustment) {
        if (!(Fn->optForMinSize() &&
-              adjustStackWithPops(MBB, I, DL, StackAdjustment)))
-          BuildStackAdjustment(MBB, I, DL, StackAdjustment,
+              adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
+          BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
                               /*InEpilogue=*/false);
      }
    }
@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
      // TODO: When not using precise CFA, we also need to adjust for the
      // InternalAmt here.
      if (CfaAdjustment) {
-        BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
-                                 nullptr, CfaAdjustment));
+        BuildCFI(MBB, InsertPos, DL,
+                 MCCFIInstruction::createAdjustCfaOffset(nullptr,
+                                                         CfaAdjustment));
      }
    }

--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
                                        const SmallBitVector &Zeroable,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
+  SmallVector<int, 4> WidenedMask;
+  if (!canWidenShuffleElements(Mask, WidenedMask))
+    return SDValue();
+
  // TODO: If minimizing size and one of the inputs is a zero vector and the
  // the zero vector has only one use, we could use a VPERM2X128 to save the
  // instruction bytes needed to explicitly generate the zero vector.
@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
  //    [6]   - ignore
  //    [7]   - zero high half of destination

-  int MaskLO = Mask[0];
-  if (MaskLO == SM_SentinelUndef)
-    MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
+  int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
+  int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];

-  int MaskHI = Mask[2];
-  if (MaskHI == SM_SentinelUndef)
-    MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
-
-  unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
+  unsigned PermMask = MaskLO | (MaskHI << 4);

  // If either input is a zero vector, replace it with an undef input.
  // Shuffle mask values <  4 are selecting elements of V1.
@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
  // selecting the zero vector and setting the zero mask bit.
  if (IsV1Zero) {
    V1 = DAG.getUNDEF(VT);
-    if (MaskLO < 4)
+    if (MaskLO < 2)
      PermMask = (PermMask & 0xf0) | 0x08;
-    if (MaskHI < 4)
+    if (MaskHI < 2)
      PermMask = (PermMask & 0x0f) | 0x80;
  }
  if (IsV2Zero) {
    V2 = DAG.getUNDEF(VT);
-    if (MaskLO >= 4)
+    if (MaskLO >= 2)
      PermMask = (PermMask & 0xf0) | 0x08;
-    if (MaskHI >= 4)
+    if (MaskHI >= 2)
      PermMask = (PermMask & 0x0f) | 0x80;
  }

@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
  assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

-  SmallVector<int, 4> WidenedMask;
-  if (canWidenShuffleElements(Mask, WidenedMask))
-    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
-                                             Zeroable, Subtarget, DAG))
-      return V;
+  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
+                                           Zeroable, Subtarget, DAG))
+    return V;

  if (V2.isUndef()) {
    // Check for being able to broadcast a single element.
@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
  assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");

-  SmallVector<int, 4> WidenedMask;
-  if (canWidenShuffleElements(Mask, WidenedMask))
-    if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
-                                             Zeroable, Subtarget, DAG))
-      return V;
+  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
+                                           Zeroable, Subtarget, DAG))
+    return V;

  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
                                                Zeroable, Subtarget, DAG))
@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
  if (!canWidenShuffleElements(Mask, WidenedMask))
    return SDValue();

-  SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
-  // Insure elements came from the same Op.
-  int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
-  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
-    if (WidenedMask[i] == SM_SentinelZero)
-      return SDValue();
-    if (WidenedMask[i] == SM_SentinelUndef)
+  // Check for patterns which can be matched with a single insert of a 256-bit
+  // subvector.
+  bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
+                                        {0, 1, 2, 3, 0, 1, 2, 3});
+  if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
+                                        {0, 1, 2, 3, 8, 9, 10, 11})) {
+    MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
+    SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+                              DAG.getIntPtrConstant(0, DL));
+    SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+                              OnlyUsesV1 ? V1 : V2,
+                              DAG.getIntPtrConstant(0, DL));
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+  }
+
+  assert(WidenedMask.size() == 4);
+
+  // See if this is an insertion of the lower 128-bits of V2 into V1.
+  bool IsInsert = true;
+  int V2Index = -1;
+  for (int i = 0; i < 4; ++i) {
+    assert(WidenedMask[i] >= -1);
+    if (WidenedMask[i] < 0)
      continue;

-    SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1;
-    unsigned OpIndex = (i < Size/2) ? 0 : 1;
+    // Make sure all V1 subvectors are in place.
+    if (WidenedMask[i] < 4) {
+      if (WidenedMask[i] != i) {
+        IsInsert = false;
+        break;
+      }
+    } else {
+      // Make sure we only have a single V2 index and its the lowest 128-bits.
+      if (V2Index >= 0 || WidenedMask[i] != 4) {
+        IsInsert = false;
+        break;
+      }
+      V2Index = i;
+    }
+  }
+  if (IsInsert && V2Index >= 0) {
+    MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
+    SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
+                                 DAG.getIntPtrConstant(0, DL));
+    return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
+  }
+
+  // Try to lower to to vshuf64x2/vshuf32x4.
+  SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
+  unsigned PermMask = 0;
+  // Insure elements came from the same Op.
+  for (int i = 0; i < 4; ++i) {
+    assert(WidenedMask[i] >= -1);
+    if (WidenedMask[i] < 0)
+      continue;
+
+    SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
+    unsigned OpIndex = i / 2;
    if (Ops[OpIndex].isUndef())
      Ops[OpIndex] = Op;
    else if (Ops[OpIndex] != Op)
      return SDValue();
-  }

-  // Form a 128-bit permutation.
-  // Convert the 64-bit shuffle mask selection values into 128-bit selection
-  // bits defined by a vshuf64x2 instruction's immediate control byte.
-  unsigned PermMask = 0, Imm = 0;
-  unsigned ControlBitsNum = WidenedMask.size() / 2;
-
-  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
-    // Use first element in place of undef mask.
-    Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
-    PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
+    // Convert the 128-bit shuffle mask selection values into 128-bit selection
+    // bits defined by a vshuf64x2 instruction's immediate control byte.
+    PermMask |= (WidenedMask[i] % 4) << (i * 2);
  }

  return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
  int NumElements = Mask.size();

-  int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0;
+  int NumV1Elements = 0, NumV2Elements = 0;
  for (int M : Mask)
    if (M < 0)
-      ++NumSentinelElements;
+      continue;
    else if (M < NumElements)
      ++NumV1Elements;
    else
@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
                                  Mask, PassThru, Subtarget, DAG);
    }
    case INTR_TYPE_3OP_IMM8_MASK:
-    case INTR_TYPE_3OP_MASK:
-    case INSERT_SUBVEC: {
+    case INTR_TYPE_3OP_MASK: {
      SDValue Src1 = Op.getOperand(1);
      SDValue Src2 = Op.getOperand(2);
      SDValue Src3 = Op.getOperand(3);
@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget

      if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
        Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
-      else if (IntrData->Type == INSERT_SUBVEC) {
-        // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
-        assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
-        unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
-        Imm *= Src2.getSimpleValueType().getVectorNumElements();
-        Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
-      }

      // We specify 2 possible opcodes for intrinsics with rounding modes.
      // First, we check if the intrinsic may have non-default rounding mode,
@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
    return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
                                    Op.getOperand(2));
  }
+  case ISD::INSERT_SUBVECTOR: {
+    unsigned EltSize = EltVT.getSizeInBits();
+    if (EltSize != 32 && EltSize != 64)
+      return false;
+    MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+    // Only change element size, not type.
+    if (VT.isInteger() != OpEltVT.isInteger())
+      return false;
+    uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+    Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+    SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
+    DCI.AddToWorklist(Op0.getNode());
+    // Op1 needs to be bitcasted to a smaller vector with the same element type.
+    SDValue Op1 = Op.getOperand(1);
+    MVT Op1VT = MVT::getVectorVT(EltVT,
+                            Op1.getSimpleValueType().getSizeInBits() / EltSize);
+    Op1 = DAG.getBitcast(Op1VT, Op1);
+    DCI.AddToWorklist(Op1.getNode());
+    DCI.CombineTo(OrigOp.getNode(),
+                  DAG.getNode(Opcode, DL, VT, Op0, Op1,
+                              DAG.getConstant(Imm, DL, MVT::i8)));
+    return true;
+  }
  }

  return false;
@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
  return SDValue();
 }

+/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
+/// the codegen.
+/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
+static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
+                                          const X86Subtarget &Subtarget,
+                                          SDLoc &DL) {
+  assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
+  SDValue Src = N->getOperand(0);
+  unsigned Opcode = Src.getOpcode();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  EVT VT = N->getValueType(0);
+  EVT SrcVT = Src.getValueType();
+
+  auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
+    // TODO: Add extra cases where we can truncate both inputs for the
+    // cost of one (or none).
+    // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
+    if (Op0 == Op1)
+      return true;
+
+    SDValue BC0 = peekThroughOneUseBitcasts(Op0);
+    SDValue BC1 = peekThroughOneUseBitcasts(Op1);
+    return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
+           ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
+  };
+
+  auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
+    SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
+    SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
+  };
+
+  // Don't combine if the operation has other uses.
+  if (!N->isOnlyUserOf(Src.getNode()))
+    return SDValue();
+
+  // Only support vector truncation for now.
+  // TODO: i64 scalar math would benefit as well.
+  if (!VT.isVector())
+    return SDValue();
+
+  // In most cases its only worth pre-truncating if we're only facing the cost
+  // of one truncation.
+  // i.e. if one of the inputs will constant fold or the input is repeated.
+  switch (Opcode) {
+  case ISD::AND:
+  case ISD::XOR:
+  case ISD::OR: {
+    SDValue Op0 = Src.getOperand(0);
+    SDValue Op1 = Src.getOperand(1);
+    if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
+        IsRepeatedOpOrOneUseConstant(Op0, Op1))
+      return TruncateArithmetic(Op0, Op1);
+    break;
+  }
+
+  case ISD::MUL:
+    // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
+    // better to truncate if we have the chance.
+    if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
+        !TLI.isOperationLegal(Opcode, SrcVT))
+      return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
+    LLVM_FALLTHROUGH;
+  case ISD::ADD: {
+    SDValue Op0 = Src.getOperand(0);
+    SDValue Op1 = Src.getOperand(1);
+    if (TLI.isOperationLegal(Opcode, VT) &&
+        IsRepeatedOpOrOneUseConstant(Op0, Op1))
+      return TruncateArithmetic(Op0, Op1);
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
 /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
 static SDValue
 combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
  SDValue Src = N->getOperand(0);
  SDLoc DL(N);

+  // Attempt to pre-truncate inputs to arithmetic ops instead.
+  if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
+    return V;
+
  // Try to detect AVG pattern first.
  if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
    return Avg;
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@ -650,33 +650,6 @@ multiclass vextract_for_size<int Opcode,
                                From.ZSuffix # "rrkz")
                To.KRCWM:$mask, From.RC:$src1,
                (EXTRACT_get_vextract_imm To.RC:$ext))>;
-
-  // Intrinsic call with masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rrk")
-                To.RC:$src0,
-                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
-                From.RC:$src1, imm:$idx)>;
-
-  // Intrinsic call with zero-masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rrkz")
-                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
-                From.RC:$src1, imm:$idx)>;
-
-  // Intrinsic call without masking.
-  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x" # To.NumElts # "_" # From.Size)
-                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
-            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
-                                From.ZSuffix # "rr")
-                From.RC:$src1, imm:$idx)>;
 }

 // Codegen pattern for the alternative types
@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
  }
  let isCodeGenOnly = 1 in {
-    defm Int_VUCOMISSZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
-                              load, "ucomiss">, PS, EVEX, VEX_LIG,
+    defm Int_VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
+                              sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
                              EVEX_CD8<32, CD8VT1>;
-    defm Int_VUCOMISDZ  : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
-                              load, "ucomisd">, PD, EVEX,
+    defm Int_VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
+                              sse_load_f64, "ucomisd">, PD, EVEX,
                              VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;

-    defm Int_VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
-                              load, "comiss">, PS, EVEX, VEX_LIG,
+    defm Int_VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
+                              sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
                              EVEX_CD8<32, CD8VT1>;
-    defm Int_VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
-                              load, "comisd">, PD, EVEX,
+    defm Int_VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
+                              sse_load_f64, "comisd">, PD, EVEX,
                              VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
  }
 }
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
          Sched<[WriteFAddLd, ReadAfterLd]>;
 }

+// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
+multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
+                            ValueType vt, Operand memop,
+                            ComplexPattern mem_cpat, string OpcodeStr> {
+  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
+                     IIC_SSE_COMIS_RR>,
+          Sched<[WriteFAdd]>;
+  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1),
+                                           mem_cpat:$src2))],
+                                           IIC_SSE_COMIS_RM>,
+          Sched<[WriteFAddLd, ReadAfterLd]>;
+}
+
 let Defs = [EFLAGS] in {
  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                  "ucomiss">, PS, VEX, VEX_LIG;
@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in {
  }

  let isCodeGenOnly = 1 in {
-    defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
-                              load, "ucomiss">, PS, VEX;
-    defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
-                              load, "ucomisd">, PD, VEX;
+    defm Int_VUCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+                              sse_load_f32, "ucomiss">, PS, VEX;
+    defm Int_VUCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+                              sse_load_f64, "ucomisd">, PD, VEX;

-    defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
-                              load, "comiss">, PS, VEX;
-    defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
-                              load, "comisd">, PD, VEX;
+    defm Int_VCOMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+                              sse_load_f32, "comiss">, PS, VEX;
+    defm Int_VCOMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+                              sse_load_f64, "comisd">, PD, VEX;
  }
  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                  "ucomiss">, PS;
@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in {
  }

  let isCodeGenOnly = 1 in {
-    defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
-                                load, "ucomiss">, PS;
-    defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
-                                load, "ucomisd">, PD;
+    defm Int_UCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+                                sse_load_f32, "ucomiss">, PS;
+    defm Int_UCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+                                sse_load_f64, "ucomisd">, PD;

-    defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
-                                    "comiss">, PS;
-    defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
-                                    "comisd">, PD;
+    defm Int_COMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+                                    sse_load_f32, "comiss">, PS;
+    defm Int_COMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+                                    sse_load_f64, "comisd">, PD;
  }
 } // Defs = [EFLAGS]

--- a/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h
@ -1,4 +1,4 @@
-//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===//
+//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry {

 // X86 EVEX encoded instructions that have a VEX 128 encoding
 // (table format: <EVEX opcode, VEX-128 opcode>).
-static const X86EvexToVexCompressTableEntry 
-   X86EvexToVex128CompressTable[] = {
+static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
  // EVEX scalar with corresponding VEX.
  { X86::Int_VCOMISDZrm         ,  X86::Int_VCOMISDrm            },
  { X86::Int_VCOMISDZrr         ,  X86::Int_VCOMISDrr            },
@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry
  { X86::VUCOMISDZrr            ,  X86::VUCOMISDrr               },
  { X86::VUCOMISSZrm            ,  X86::VUCOMISSrm               },
  { X86::VUCOMISSZrr            ,  X86::VUCOMISSrr               },
-                                                                               
+
  { X86::VMOV64toPQIZrr         ,   X86::VMOV64toPQIrr           },
  { X86::VMOV64toSDZrr          ,   X86::VMOV64toSDrr            },
  { X86::VMOVDI2PDIZrm          ,   X86::VMOVDI2PDIrm            },
  { X86::VMOVDI2PDIZrr          ,   X86::VMOVDI2PDIrr            },
  { X86::VMOVLHPSZrr            ,   X86::VMOVLHPSrr              },
-  { X86::VMOVHLPSZrr            ,   X86::VMOVHLPSrr              },  
+  { X86::VMOVHLPSZrr            ,   X86::VMOVHLPSrr              },
  { X86::VMOVPDI2DIZmr          ,   X86::VMOVPDI2DImr            },
  { X86::VMOVPDI2DIZrr          ,   X86::VMOVPDI2DIrr            },
  { X86::VMOVPQI2QIZmr          ,   X86::VMOVPQI2QImr            },
  { X86::VMOVPQIto64Zrr         ,   X86::VMOVPQIto64rr           },
  { X86::VMOVQI2PQIZrm          ,   X86::VMOVQI2PQIrm            },
  { X86::VMOVZPQILo2PQIZrr      ,   X86::VMOVZPQILo2PQIrr        },
-                                                                               
+
  { X86::VPEXTRBZmr             ,   X86::VPEXTRBmr               },
  { X86::VPEXTRBZrr             ,   X86::VPEXTRBrr               },
  { X86::VPEXTRDZmr             ,   X86::VPEXTRDmr               },
@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPEXTRQZrr             ,   X86::VPEXTRQrr               },
  { X86::VPEXTRWZmr             ,   X86::VPEXTRWmr               },
  { X86::VPEXTRWZrr             ,   X86::VPEXTRWri               },
-                                                                               
+
  { X86::VPINSRBZrm             ,   X86::VPINSRBrm               },
  { X86::VPINSRBZrr             ,   X86::VPINSRBrr               },
  { X86::VPINSRDZrm             ,   X86::VPINSRDrm               },
@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry
  { X86::VANDPDZ128rm           ,    X86::VANDPDrm               },
  { X86::VANDPDZ128rr           ,    X86::VANDPDrr               },
  { X86::VANDPSZ128rm           ,    X86::VANDPSrm               },
-  { X86::VANDPSZ128rr           ,    X86::VANDPSrr               },      
+  { X86::VANDPSZ128rr           ,    X86::VANDPSrr               },
  { X86::VBROADCASTSSZ128m      ,    X86::VBROADCASTSSrm         },
  { X86::VBROADCASTSSZ128r      ,    X86::VBROADCASTSSrr         },
  { X86::VBROADCASTSSZ128r_s    ,    X86::VBROADCASTSSrr         },
@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry
  { X86::VMOVAPDZ128rm          ,    X86::VMOVAPDrm              },
  { X86::VMOVAPDZ128rr          ,    X86::VMOVAPDrr              },
  { X86::VMOVAPDZ128rr_REV      ,    X86::VMOVAPDrr_REV          },
-  { X86::VMOVAPSZ128mr          ,    X86::VMOVAPSmr              },    
-  { X86::VMOVAPSZ128rm          ,    X86::VMOVAPSrm              },    
+  { X86::VMOVAPSZ128mr          ,    X86::VMOVAPSmr              },
+  { X86::VMOVAPSZ128rm          ,    X86::VMOVAPSrm              },
  { X86::VMOVAPSZ128rr          ,    X86::VMOVAPSrr              },
  { X86::VMOVAPSZ128rr_REV      ,    X86::VMOVAPSrr_REV          },
  { X86::VMOVDDUPZ128rm         ,    X86::VMOVDDUPrm             },
@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry
  { X86::VMOVUPDZ128rm          ,    X86::VMOVUPDrm              },
  { X86::VMOVUPDZ128rr          ,    X86::VMOVUPDrr              },
  { X86::VMOVUPDZ128rr_REV      ,    X86::VMOVUPDrr_REV          },
-  { X86::VMOVUPSZ128mr          ,    X86::VMOVUPSmr              },    
-  { X86::VMOVUPSZ128rm          ,    X86::VMOVUPSrm              },    
+  { X86::VMOVUPSZ128mr          ,    X86::VMOVUPSmr              },
+  { X86::VMOVUPSZ128rm          ,    X86::VMOVUPSrm              },
  { X86::VMOVUPSZ128rr          ,    X86::VMOVUPSrr              },
  { X86::VMOVUPSZ128rr_REV      ,    X86::VMOVUPSrr_REV          },
  { X86::VMULPDZ128rm           ,    X86::VMULPDrm               },
@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPBROADCASTBZ128r      ,    X86::VPBROADCASTBrr         },
  { X86::VPBROADCASTDZ128m      ,    X86::VPBROADCASTDrm         },
  { X86::VPBROADCASTDZ128r      ,    X86::VPBROADCASTDrr         },
-  { X86::VPBROADCASTQZ128m      ,    X86::VPBROADCASTQrm         }, 
-  { X86::VPBROADCASTQZ128r      ,    X86::VPBROADCASTQrr         }, 
-  { X86::VPBROADCASTWZ128m      ,    X86::VPBROADCASTWrm         },    
+  { X86::VPBROADCASTQZ128m      ,    X86::VPBROADCASTQrm         },
+  { X86::VPBROADCASTQZ128r      ,    X86::VPBROADCASTQrr         },
+  { X86::VPBROADCASTWZ128m      ,    X86::VPBROADCASTWrm         },
  { X86::VPBROADCASTWZ128r      ,    X86::VPBROADCASTWrr         },
  { X86::VPERMILPDZ128mi        ,    X86::VPERMILPDmi            },
  { X86::VPERMILPDZ128ri        ,    X86::VPERMILPDri            },
@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPMOVZXWDZ128rm        ,    X86::VPMOVZXWDrm            },
  { X86::VPMOVZXWDZ128rr        ,    X86::VPMOVZXWDrr            },
  { X86::VPMOVZXWQZ128rm        ,    X86::VPMOVZXWQrm            },
-  { X86::VPMOVZXWQZ128rr        ,    X86::VPMOVZXWQrr            },    
+  { X86::VPMOVZXWQZ128rr        ,    X86::VPMOVZXWQrr            },
  { X86::VPMULDQZ128rm          ,    X86::VPMULDQrm              },
  { X86::VPMULDQZ128rr          ,    X86::VPMULDQrr              },
  { X86::VPMULHRSWZ128rm        ,    X86::VPMULHRSWrm            },
@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPSHUFHWZ128ri         ,    X86::VPSHUFHWri             },
  { X86::VPSHUFLWZ128mi         ,    X86::VPSHUFLWmi             },
  { X86::VPSHUFLWZ128ri         ,    X86::VPSHUFLWri             },
-  { X86::VPSLLDQZ128rr          ,    X86::VPSLLDQri              },    
+  { X86::VPSLLDQZ128rr          ,    X86::VPSLLDQri              },
  { X86::VPSLLDZ128ri           ,    X86::VPSLLDri               },
  { X86::VPSLLDZ128rm           ,    X86::VPSLLDrm               },
-  { X86::VPSLLDZ128rr           ,    X86::VPSLLDrr               },    
+  { X86::VPSLLDZ128rr           ,    X86::VPSLLDrr               },
  { X86::VPSLLQZ128ri           ,    X86::VPSLLQri               },
  { X86::VPSLLQZ128rm           ,    X86::VPSLLQrm               },
  { X86::VPSLLQZ128rr           ,    X86::VPSLLQrr               },
@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry

 // X86 EVEX encoded instructions that have a VEX 256 encoding
 // (table format: <EVEX opcode, VEX-256 opcode>).
- static const X86EvexToVexCompressTableEntry 
-   X86EvexToVex256CompressTable[] = {
+ static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
  { X86::VADDPDZ256rm           ,     X86::VADDPDYrm             },
  { X86::VADDPDZ256rr           ,     X86::VADDPDYrr             },
  { X86::VADDPSZ256rm           ,     X86::VADDPSYrm             },
@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry
  { X86::VANDPDZ256rr           ,     X86::VANDPDYrr             },
  { X86::VANDPSZ256rm           ,     X86::VANDPSYrm             },
  { X86::VANDPSZ256rr           ,     X86::VANDPSYrr             },
-  { X86::VBROADCASTSDZ256m      ,     X86::VBROADCASTSDYrm       }, 
-  { X86::VBROADCASTSDZ256r      ,     X86::VBROADCASTSDYrr       }, 
-  { X86::VBROADCASTSDZ256r_s    ,     X86::VBROADCASTSDYrr       }, 
+  { X86::VBROADCASTSDZ256m      ,     X86::VBROADCASTSDYrm       },
+  { X86::VBROADCASTSDZ256r      ,     X86::VBROADCASTSDYrr       },
+  { X86::VBROADCASTSDZ256r_s    ,     X86::VBROADCASTSDYrr       },
  { X86::VBROADCASTSSZ256m      ,     X86::VBROADCASTSSYrm       },
-  { X86::VBROADCASTSSZ256r      ,     X86::VBROADCASTSSYrr       }, 
+  { X86::VBROADCASTSSZ256r      ,     X86::VBROADCASTSSYrr       },
  { X86::VBROADCASTSSZ256r_s    ,     X86::VBROADCASTSSYrr       },
  { X86::VCVTDQ2PDZ256rm        ,     X86::VCVTDQ2PDYrm          },
  { X86::VCVTDQ2PDZ256rr        ,     X86::VCVTDQ2PDYrr          },
@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry
  { X86::VDIVPDZ256rr           ,     X86::VDIVPDYrr             },
  { X86::VDIVPSZ256rm           ,     X86::VDIVPSYrm             },
  { X86::VDIVPSZ256rr           ,     X86::VDIVPSYrr             },
+  { X86::VEXTRACTF32x4Z256mr    ,    X86::VEXTRACTF128mr         },
+  { X86::VEXTRACTF64x2Z256mr    ,    X86::VEXTRACTF128mr         },
+  { X86::VEXTRACTF32x4Z256rr    ,    X86::VEXTRACTF128rr         },
+  { X86::VEXTRACTF64x2Z256rr    ,    X86::VEXTRACTF128rr         },
+  { X86::VEXTRACTI32x4Z256mr    ,    X86::VEXTRACTI128mr         },
+  { X86::VEXTRACTI64x2Z256mr    ,    X86::VEXTRACTI128mr         },
+  { X86::VEXTRACTI32x4Z256rr    ,    X86::VEXTRACTI128rr         },
+  { X86::VEXTRACTI64x2Z256rr    ,    X86::VEXTRACTI128rr         },
  { X86::VFMADD132PDZ256m       ,     X86::VFMADD132PDYm         },
  { X86::VFMADD132PDZ256r       ,     X86::VFMADD132PDYr         },
  { X86::VFMADD132PSZ256m       ,     X86::VFMADD132PSYm         },
@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry
  { X86::VFNMSUB231PDZ256r      ,     X86::VFNMSUB231PDYr        },
  { X86::VFNMSUB231PSZ256m      ,     X86::VFNMSUB231PSYm        },
  { X86::VFNMSUB231PSZ256r      ,     X86::VFNMSUB231PSYr        },
+  { X86::VINSERTF32x4Z256rm     ,    X86::VINSERTF128rm          },
+  { X86::VINSERTF64x2Z256rm     ,    X86::VINSERTF128rm          },
+  { X86::VINSERTF32x4Z256rr     ,    X86::VINSERTF128rr          },
+  { X86::VINSERTF64x2Z256rr     ,    X86::VINSERTF128rr          },
+  { X86::VINSERTI32x4Z256rm     ,    X86::VINSERTI128rm          },
+  { X86::VINSERTI64x2Z256rm     ,    X86::VINSERTI128rm          },
+  { X86::VINSERTI32x4Z256rr     ,    X86::VINSERTI128rr          },
+  { X86::VINSERTI64x2Z256rr     ,    X86::VINSERTI128rr          },
  { X86::VMAXCPDZ256rm          ,     X86::VMAXCPDYrm            },
  { X86::VMAXCPDZ256rr          ,     X86::VMAXCPDYrr            },
  { X86::VMAXCPSZ256rm          ,     X86::VMAXCPSYrm            },
@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry
  { X86::VMOVAPDZ256rm          ,     X86::VMOVAPDYrm            },
  { X86::VMOVAPDZ256rr          ,     X86::VMOVAPDYrr            },
  { X86::VMOVAPDZ256rr_REV      ,     X86::VMOVAPDYrr_REV        },
-  { X86::VMOVAPSZ256mr          ,     X86::VMOVAPSYmr            },    
-  { X86::VMOVAPSZ256rm          ,     X86::VMOVAPSYrm            },    
+  { X86::VMOVAPSZ256mr          ,     X86::VMOVAPSYmr            },
+  { X86::VMOVAPSZ256rm          ,     X86::VMOVAPSYrm            },
  { X86::VMOVAPSZ256rr          ,     X86::VMOVAPSYrr            },
  { X86::VMOVAPSZ256rr_REV      ,     X86::VMOVAPSYrr_REV        },
  { X86::VMOVDDUPZ256rm         ,     X86::VMOVDDUPYrm           },
@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPAVGBZ256rr           ,     X86::VPAVGBYrr             },
  { X86::VPAVGWZ256rm           ,     X86::VPAVGWYrm             },
  { X86::VPAVGWZ256rr           ,     X86::VPAVGWYrr             },
-  { X86::VPBROADCASTBZ256m      ,     X86::VPBROADCASTBYrm       }, 
-  { X86::VPBROADCASTBZ256r      ,     X86::VPBROADCASTBYrr       },   
-  { X86::VPBROADCASTDZ256m      ,     X86::VPBROADCASTDYrm       }, 
-  { X86::VPBROADCASTDZ256r      ,     X86::VPBROADCASTDYrr       },   
-  { X86::VPBROADCASTQZ256m      ,     X86::VPBROADCASTQYrm       }, 
-  { X86::VPBROADCASTQZ256r      ,     X86::VPBROADCASTQYrr       }, 
-  { X86::VPBROADCASTWZ256m      ,     X86::VPBROADCASTWYrm       }, 
-  { X86::VPBROADCASTWZ256r      ,     X86::VPBROADCASTWYrr       }, 
+  { X86::VPBROADCASTBZ256m      ,     X86::VPBROADCASTBYrm       },
+  { X86::VPBROADCASTBZ256r      ,     X86::VPBROADCASTBYrr       },
+  { X86::VPBROADCASTDZ256m      ,     X86::VPBROADCASTDYrm       },
+  { X86::VPBROADCASTDZ256r      ,     X86::VPBROADCASTDYrr       },
+  { X86::VPBROADCASTQZ256m      ,     X86::VPBROADCASTQYrm       },
+  { X86::VPBROADCASTQZ256r      ,     X86::VPBROADCASTQYrr       },
+  { X86::VPBROADCASTWZ256m      ,     X86::VPBROADCASTWYrm       },
+  { X86::VPBROADCASTWZ256r      ,     X86::VPBROADCASTWYrr       },
  { X86::VPERMDZ256rm           ,     X86::VPERMDYrm             },
  { X86::VPERMDZ256rr           ,     X86::VPERMDYrr             },
  { X86::VPERMILPDZ256mi        ,     X86::VPERMILPDYmi          },
@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPSLLDQZ256rr          ,     X86::VPSLLDQYri            },
  { X86::VPSLLDZ256ri           ,     X86::VPSLLDYri             },
  { X86::VPSLLDZ256rm           ,     X86::VPSLLDYrm             },
-  { X86::VPSLLDZ256rr           ,     X86::VPSLLDYrr             },    
+  { X86::VPSLLDZ256rr           ,     X86::VPSLLDYrr             },
  { X86::VPSLLQZ256ri           ,     X86::VPSLLQYri             },
  { X86::VPSLLQZ256rm           ,     X86::VPSLLQYrm             },
  { X86::VPSLLQZ256rr           ,     X86::VPSLLQYrr             },
@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPSLLVQZ256rr          ,     X86::VPSLLVQYrr            },
  { X86::VPSLLWZ256ri           ,     X86::VPSLLWYri             },
  { X86::VPSLLWZ256rm           ,     X86::VPSLLWYrm             },
-  { X86::VPSLLWZ256rr           ,     X86::VPSLLWYrr             },    
+  { X86::VPSLLWZ256rr           ,     X86::VPSLLWYrr             },
  { X86::VPSRADZ256ri           ,     X86::VPSRADYri             },
  { X86::VPSRADZ256rm           ,     X86::VPSRADYrm             },
  { X86::VPSRADZ256rr           ,     X86::VPSRADYrr             },
@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry
  { X86::VPSRLDQZ256rr          ,     X86::VPSRLDQYri            },
  { X86::VPSRLDZ256ri           ,     X86::VPSRLDYri             },
  { X86::VPSRLDZ256rm           ,     X86::VPSRLDYrm             },
-  { X86::VPSRLDZ256rr           ,     X86::VPSRLDYrr             },   
+  { X86::VPSRLDZ256rr           ,     X86::VPSRLDYrr             },
  { X86::VPSRLQZ256ri           ,     X86::VPSRLQYri             },
  { X86::VPSRLQZ256rm           ,     X86::VPSRLQYrm             },
  { X86::VPSRLQZ256rr           ,     X86::VPSRLQYrr             },
@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry
  { X86::VXORPSZ256rr           ,     X86::VXORPSYrr             },
 };

-#endif
+#endif
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t {
  INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
  COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
  TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
-  EXPAND_FROM_MEM, INSERT_SUBVEC,
+  EXPAND_FROM_MEM,
  TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
  FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
 };
@ -795,30 +795,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                     X86ISD::VGETMANTS, 0),
  X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
                     X86ISD::VGETMANTS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
-                     ISD::INSERT_SUBVECTOR, 0),
  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
                     ISD::CTLZ, 0),
  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo
  OutStreamer->EmitInstruction(TC, getSubtargetInfo());
 }

-void X86AsmPrinter::EmitXRayTable() {
-  if (Sleds.empty())
-    return;
-  
-  auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  auto Fn = MF->getFunction();
-  MCSection *Section = nullptr;
-  if (Subtarget->isTargetELF()) {
-    if (Fn->hasComdat()) {
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-                                         Fn->getComdat()->getName());
-    } else {
-      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC);
-    }
-  } else if (Subtarget->isTargetMachO()) {
-    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
-                                         SectionKind::getReadOnlyWithRel());
-  } else {
-    llvm_unreachable("Unsupported target");
-  }
-
-  // Before we switch over, we force a reference to a label inside the
-  // xray_instr_map section. Since EmitXRayTable() is always called just
-  // before the function's end, we assume that this is happening after the
-  // last return instruction.
-  //
-  // We then align the reference to 16 byte boundaries, which we determined
-  // experimentally to be beneficial to avoid causing decoder stalls.
-  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
-  OutStreamer->EmitCodeAlignment(16);
-  OutStreamer->EmitSymbolValue(Tmp, 8, false);
-  OutStreamer->SwitchSection(Section);
-  OutStreamer->EmitLabel(Tmp);
-  for (const auto &Sled : Sleds) {
-    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
-    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
-    auto Kind = static_cast<uint8_t>(Sled.Kind);
-    OutStreamer->EmitBytes(
-        StringRef(reinterpret_cast<const char *>(&Kind), 1));
-    OutStreamer->EmitBytes(
-        StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-    OutStreamer->EmitZeros(14);
-  }
-  OutStreamer->SwitchSection(PrevSection);
-
-  Sleds.clear();
-}
-
 // Returns instruction preceding MBBI in MachineFunction.
 // If MBBI is the first instruction of the first basic block, returns null.
 static MachineBasicBlock::const_iterator
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -598,197 +598,135 @@ int X86TTIImpl::getArithmeticInstrCost(

 int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                               Type *SubTp) {
-
-  if (Kind == TTI::SK_Reverse) {
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
-    static const CostTblEntry AVX512VBMIShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  1 }, // vpermb
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  1 }  // vpermb
-    };
-
-    if (ST->hasVBMI())
-      if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX512BWShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
-      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  6 }  // vextracti64x4 + 2*vperm2i128
-                                               // + 2*pshufb + vinserti64x4
-    };
-
-    if (ST->hasBWI())
-      if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX512ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v8f64,  1 }, // vpermpd
-      { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
-      { ISD::VECTOR_SHUFFLE, MVT::v8i64,  1 }, // vpermq
-      { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
-    };
-
-    if (ST->hasAVX512())
-      if (const auto *Entry =
-              CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX2ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  1 }, // vpermpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  1 }, // vpermps
-      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  1 }, // vpermq
-      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  1 }, // vpermd
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  2 }  // vperm2i128 + pshufb
-    };
-
-    if (ST->hasAVX2())
-      if (const auto *Entry =
-              CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry AVX1ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  2 }, // vperm2f128 + vpermilps
-      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
-      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  2 }, // vperm2f128 + vpermilps
-      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
-                                               // + vinsertf128
-      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  4 }  // vextractf128 + 2*pshufb
-                                               // + vinsertf128
-    };
-
-    if (ST->hasAVX())
-      if (const auto *Entry =
-              CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSSE3ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
-      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 }  // pshufb
-    };
-
-    if (ST->hasSSSE3())
-      if (const auto *Entry =
-              CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSE2ShuffleTbl[] = {
-      { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
-      { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw  + pshufd
-      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 }  // 2*pshuflw + 2*pshufhw
-                                              // + 2*pshufd + 2*unpck + packus
-    };
-
-    if (ST->hasSSE2())
-      if (const auto *Entry =
-              CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-    static const CostTblEntry SSE1ShuffleTbl[] = {
-        { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
-    };
-
-    if (ST->hasSSE1())
-      if (const auto *Entry =
-              CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
-        return LT.first * Entry->Cost;
-
-  } else if (Kind == TTI::SK_Alternate) {
+  if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
    // 64-bit packed float vectors (v2f32) are widened to type v4f32.
    // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);

-    // The backend knows how to generate a single VEX.256 version of
-    // instruction VPBLENDW if the target supports AVX2.
-    if (ST->hasAVX2() && LT.second == MVT::v16i16)
-      return LT.first;
+    static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+      { TTI::SK_Reverse, MVT::v64i8,  1 }, // vpermb
+      { TTI::SK_Reverse, MVT::v32i8,  1 }  // vpermb
+    };

-    static const CostTblEntry AVXAltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1},  // vblendpd
-      {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1},  // vblendpd
+    if (ST->hasVBMI())
+      if (const auto *Entry =
+              CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;

-      {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1},  // vblendps
-      {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1},  // vblendps
+    static const CostTblEntry AVX512BWShuffleTbl[] = {
+      { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+      { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+      { TTI::SK_Reverse, MVT::v64i8,  6 }  // vextracti64x4 + 2*vperm2i128
+                                           // + 2*pshufb + vinserti64x4
+    };

-      // This shuffle is custom lowered into a sequence of:
-      //  2x  vextractf128 , 2x vpblendw , 1x vinsertf128
-      {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
+    if (ST->hasBWI())
+      if (const auto *Entry =
+              CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;

-      // This shuffle is custom lowered into a long sequence of:
-      //  2x vextractf128 , 4x vpshufb , 2x vpor ,  1x vinsertf128
-      {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
+    static const CostTblEntry AVX512ShuffleTbl[] = {
+      { TTI::SK_Reverse, MVT::v8f64,  1 }, // vpermpd
+      { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+      { TTI::SK_Reverse, MVT::v8i64,  1 }, // vpermq
+      { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
+    };
+
+    if (ST->hasAVX512())
+      if (const auto *Entry =
+              CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX2ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v4f64,  1 }, // vpermpd
+      { TTI::SK_Reverse,   MVT::v8f32,  1 }, // vpermps
+      { TTI::SK_Reverse,   MVT::v4i64,  1 }, // vpermq
+      { TTI::SK_Reverse,   MVT::v8i32,  1 }, // vpermd
+      { TTI::SK_Reverse,   MVT::v16i16, 2 }, // vperm2i128 + pshufb
+      { TTI::SK_Reverse,   MVT::v32i8,  2 }, // vperm2i128 + pshufb
+
+      { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+      { TTI::SK_Alternate, MVT::v32i8,  1 }  // vpblendvb
+    };
+
+    if (ST->hasAVX2())
+      if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX1ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
+      { TTI::SK_Reverse,   MVT::v8f32,  2 }, // vperm2f128 + vpermilps
+      { TTI::SK_Reverse,   MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
+      { TTI::SK_Reverse,   MVT::v8i32,  2 }, // vperm2f128 + vpermilps
+      { TTI::SK_Reverse,   MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+                                             // + vinsertf128
+      { TTI::SK_Reverse,   MVT::v32i8,  4 }, // vextractf128 + 2*pshufb
+                                             // + vinsertf128
+
+      { TTI::SK_Alternate, MVT::v4i64,  1 }, // vblendpd
+      { TTI::SK_Alternate, MVT::v4f64,  1 }, // vblendpd
+      { TTI::SK_Alternate, MVT::v8i32,  1 }, // vblendps
+      { TTI::SK_Alternate, MVT::v8f32,  1 }, // vblendps
+      { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+      { TTI::SK_Alternate, MVT::v32i8,  3 }  // vpand + vpandn + vpor
    };

    if (ST->hasAVX())
-      if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
        return LT.first * Entry->Cost;

-    static const CostTblEntry SSE41AltShuffleTbl[] = {
-      // These are lowered into movsd.
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
-
-      // packed float vectors with four elements are lowered into BLENDI dag
-      // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
-
-      // This shuffle generates a single pshufw.
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
-
-      // There is no instruction that matches a v16i8 alternate shuffle.
-      // The backend will expand it into the sequence 'pshufb + pshufb + or'.
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
+    static const CostTblEntry SSE41ShuffleTbl[] = {
+      { TTI::SK_Alternate, MVT::v2i64,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v4i32,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v4f32,  1 }, // blendps
+      { TTI::SK_Alternate, MVT::v8i16,  1 }, // pblendw
+      { TTI::SK_Alternate, MVT::v16i8,  1 }  // pblendvb
    };

    if (ST->hasSSE41())
-      if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
-                                              LT.second))
+      if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
        return LT.first * Entry->Cost;

-    static const CostTblEntry SSSE3AltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
+    static const CostTblEntry SSSE3ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v8i16,  1 }, // pshufb
+      { TTI::SK_Reverse,   MVT::v16i8,  1 }, // pshufb

-      // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
-      // the sequence 'shufps + pshufd'
-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
-
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}  // pshufb + pshufb + or
+      { TTI::SK_Alternate, MVT::v8i16,  3 }, // pshufb + pshufb + por
+      { TTI::SK_Alternate, MVT::v16i8,  3 }  // pshufb + pshufb + por
    };

    if (ST->hasSSSE3())
-      if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
-                                              ISD::VECTOR_SHUFFLE, LT.second))
+      if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
        return LT.first * Entry->Cost;

-    static const CostTblEntry SSEAltShuffleTbl[] = {
-      {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
-      {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
+    static const CostTblEntry SSE2ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v2f64,  1 }, // shufpd
+      { TTI::SK_Reverse,   MVT::v2i64,  1 }, // pshufd
+      { TTI::SK_Reverse,   MVT::v4i32,  1 }, // pshufd
+      { TTI::SK_Reverse,   MVT::v8i16,  3 }, // pshuflw + pshufhw  + pshufd
+      { TTI::SK_Reverse,   MVT::v16i8,  9 }, // 2*pshuflw + 2*pshufhw
+                                             // + 2*pshufd + 2*unpck + packus

-      {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
-      {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
-
-      // This is expanded into a long sequence of four extract + four insert.
-      {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
-
-      // 8 x (pinsrw + pextrw + and + movb + movzb + or)
-      {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
+      { TTI::SK_Alternate, MVT::v2i64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
+      { TTI::SK_Alternate, MVT::v4i32,  2 }, // 2*shufps
+      { TTI::SK_Alternate, MVT::v8i16,  3 }, // pand + pandn + por
+      { TTI::SK_Alternate, MVT::v16i8,  3 }  // pand + pandn + por
    };

-    // Fall-back (SSE3 and SSE2).
-    if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
-                                            ISD::VECTOR_SHUFFLE, LT.second))
-      return LT.first * Entry->Cost;
+    if (ST->hasSSE2())
+      if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry SSE1ShuffleTbl[] = {
+      { TTI::SK_Reverse,   MVT::v4f32,  1 }, // shufps
+      { TTI::SK_Alternate, MVT::v4f32,  2 }  // 2*shufps
+    };
+
+    if (ST->hasSSE1())
+      if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
+        return LT.first * Entry->Cost;

  } else if (Kind == TTI::SK_PermuteTwoSrc) {
    // We assume that source and destination have the same vector type.
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
      // add(zext(xor i16 X, -32768), -32768) --> sext X
      return CastInst::Create(Instruction::SExt, X, LHS->getType());
    }
+
+    if (Val->isNegative() &&
+        match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) &&
+        Val->sge(-C->sext(Val->getBitWidth()))) {
+      // (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val))
+      return CastInst::Create(
+          Instruction::ZExt,
+          Builder->CreateNUWAdd(
+              X, Constant::getIntegerValue(X->getType(),
+                                           *C + Val->trunc(C->getBitWidth()))),
+          I.getType());
+    }
  }

  // FIXME: Use the match above instead of dyn_cast to allow these transforms
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      return replaceInstUsesWith(*II, V);
    break;
  }
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd: {
+    Value *Src0 = II->getArgOperand(0);
+    Value *Src1 = II->getArgOperand(1);
+
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+      II->setArgOperand(0, Src1);
+      II->setArgOperand(1, Src0);
+      std::swap(Src0, Src1);
+    }
+
+    Value *LHS = nullptr;
+    Value *RHS = nullptr;
+
+    // fma fneg(x), fneg(y), z -> fma x, y, z
+    if (match(Src0, m_FNeg(m_Value(LHS))) &&
+        match(Src1, m_FNeg(m_Value(RHS)))) {
+      CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+                                              {LHS, RHS, II->getArgOperand(2)});
+      NewCall->takeName(II);
+      NewCall->copyFastMathFlags(II);
+      return replaceInstUsesWith(*II, NewCall);
+    }
+
+    // fma fabs(x), fabs(x), z -> fma x, x, z
+    if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
+        match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
+      CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+                                              {LHS, LHS, II->getArgOperand(2)});
+      NewCall->takeName(II);
+      NewCall->copyFastMathFlags(II);
+      return replaceInstUsesWith(*II, NewCall);
+    }
+
+    // fma x, 1, z -> fadd x, z
+    if (match(Src1, m_FPOne())) {
+      Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
+      RI->copyFastMathFlags(II);
+      return RI;
+    }
+
+    break;
+  }
+  case Intrinsic::fabs: {
+    Value *Cond;
+    Constant *LHS, *RHS;
+    if (match(II->getArgOperand(0),
+              m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
+      CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
+      CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
+      return SelectInst::Create(Cond, Call0, Call1);
+    }
+
+    break;
+  }
  case Intrinsic::ppc_altivec_lvx:
  case Intrinsic::ppc_altivec_lvxl:
    // Turn PPC lvx -> load if the pointer is known aligned.
@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {

    // assume( (load addr) != null ) -> add 'nonnull' metadata to load
    // (if assume is valid at the load)
-    if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
-      Value *LHS = ICmp->getOperand(0);
-      Value *RHS = ICmp->getOperand(1);
-      if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
-          isa<LoadInst>(LHS) &&
-          isa<Constant>(RHS) &&
-          RHS->getType()->isPointerTy() &&
-          cast<Constant>(RHS)->isNullValue()) {
-        LoadInst* LI = cast<LoadInst>(LHS);
-        if (isValidAssumeForContext(II, LI, &DT)) {
-          MDNode *MD = MDNode::get(II->getContext(), None);
-          LI->setMetadata(LLVMContext::MD_nonnull, MD);
-          return eraseInstFromFunction(*II);
-        }
-      }
+    CmpInst::Predicate Pred;
+    Instruction *LHS;
+    if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
+        Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
+        LHS->getType()->isPointerTy() &&
+        isValidAssumeForContext(II, LHS, &DT)) {
+      MDNode *MD = MDNode::get(II->getContext(), None);
+      LHS->setMetadata(LLVMContext::MD_nonnull, MD);
+      return eraseInstFromFunction(*II);
+
      // TODO: apply nonnull return attributes to calls and invokes
      // TODO: apply range metadata for range check patterns?
    }
+
    // If there is a dominating assume with the same condition as this one,
    // then this one is redundant, and should be removed.
    APInt KnownZero(1, 0), KnownOne(1, 0);
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
  // separated by a few arithmetic operations.
  BasicBlock::iterator BBI(LI);
  bool IsLoadCSE = false;
-  if (Value *AvailableVal =
-      FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
-                               DefMaxInstsToScan, AA, &IsLoadCSE)) {
-    if (IsLoadCSE) {
-      LoadInst *NLI = cast<LoadInst>(AvailableVal);
-      unsigned KnownIDs[] = {
-          LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
-          LLVMContext::MD_noalias,         LLVMContext::MD_range,
-          LLVMContext::MD_invariant_load,  LLVMContext::MD_nonnull,
-          LLVMContext::MD_invariant_group, LLVMContext::MD_align,
-          LLVMContext::MD_dereferenceable,
-          LLVMContext::MD_dereferenceable_or_null};
-      combineMetadata(NLI, &LI, KnownIDs);
-    };
+  if (Value *AvailableVal = FindAvailableLoadedValue(
+          &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
+    if (IsLoadCSE)
+      combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);

    return replaceInstUsesWith(
        LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
    unsigned ShAmt = Op1C->getZExtValue();

+    // Turn:
+    //  %zext = zext i32 %V to i64
+    //  %res = shl i64 %V, 8
+    //
+    // Into:
+    //  %shl = shl i32 %V, 8
+    //  %res = zext i32 %shl to i64
+    //
+    // This is only valid if %V would have zeros shifted out.
+    if (auto *ZI = dyn_cast<ZExtInst>(I.getOperand(0))) {
+      unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits();
+      if (ShAmt < SrcBitWidth &&
+          MaskedValueIsZero(ZI->getOperand(0),
+                            APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) {
+        auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt);
+        return new ZExtInst(Shl, I.getType());
+      }
+    }
+
    // If the shifted-out value is known-zero, then this is a NUW shift.
    if (!I.hasNoUnsignedWrap() &&
        MaskedValueIsZero(I.getOperand(0),
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@ -481,9 +481,9 @@ private:
  bool processNode(DomTreeNode *Node);

  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
-    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+    if (auto *LI = dyn_cast<LoadInst>(Inst))
      return LI;
-    else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    if (auto *SI = dyn_cast<StoreInst>(Inst))
      return SI->getValueOperand();
    assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
    return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
--- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
 STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
 STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
 STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
+STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");

 //===----------------------------------------------------------------------===//
 //                                GVN Pass
@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
  // Unlike loads, we never try to eliminate stores, so we do not check if they
  // are simple and avoid value numbering them.
  auto *SI = cast<StoreInst>(I);
-  // If this store's memorydef stores the same value as the last store, the
-  // memory accesses are equivalent.
-  // Get the expression, if any, for the RHS of the MemoryDef.
  MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
-  MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
-      cast<MemoryDef>(StoreAccess)->getDefiningAccess());
-  const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
-  // See if this store expression already has a value, and it's the same as our
-  // current store.  FIXME: Right now, we only do this for simple stores.
+  // See if we are defined by a previous store expression, it already has a
+  // value, and it's the same value as our current store. FIXME: Right now, we
+  // only do this for simple stores, we should expand to cover memcpys, etc.
  if (SI->isSimple()) {
+    // Get the expression, if any, for the RHS of the MemoryDef.
+    MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
+        cast<MemoryDef>(StoreAccess)->getDefiningAccess());
+    const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
    CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
    if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
        CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
    if (auto *I = dyn_cast<Instruction>(V)) {
      if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
        // If this is a MemoryDef, we need to update the equivalence table. If
-        // we
-        // determined the expression is congruent to a different memory state,
-        // use that different memory state.  If we determined it didn't, we
-        // update
-        // that as well. Note that currently, we do not guarantee the
-        // "different" memory state dominates us.  The goal is to make things
-        // that are congruent look congruent, not ensure we can eliminate one in
-        // favor of the other.
-        // Right now, the only way they can be equivalent is for store
-        // expresions.
-        if (!isa<MemoryUse>(MA)) {
-          if (E && isa<StoreExpression>(E) && EClass->Members.size() != 1) {
-            auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
-            setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
-          } else {
-            setMemoryAccessEquivTo(MA, nullptr);
-          }
+        // we determined the expression is congruent to a different memory
+        // state, use that different memory state.  If we determined it didn't,
+        // we update that as well.  Right now, we only support store
+        // expressions.
+        if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
+            EClass->Members.size() != 1) {
+          auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
+          setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
+        } else {
+          setMemoryAccessEquivTo(MA, nullptr);
        }
        markMemoryUsersTouched(MA);
      }
@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
  } else {
    // Handle terminators that return values. All of them produce values we
    // don't currently understand.
-    if (!I->getType()->isVoidTy()){
+    if (!I->getType()->isVoidTy()) {
      auto *Symbolized = createUnknownExpression(I);
      performCongruenceFinding(I, Symbolized);
    }
@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() {
      continue;
    if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
      auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
-      if (FirstMUD && SecondMUD) {
-        auto *FirstInst = FirstMUD->getMemoryInst();
-        auto *SecondInst = SecondMUD->getMemoryInst();
+      if (FirstMUD && SecondMUD)
        assert(
-            ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) &&
+            ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
+                ValueToClass.lookup(SecondMUD->getMemoryInst()) &&
            "The instructions for these memory operations should have been in "
            "the same congruence class");
-      }
    } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {

      // We can only sanely verify that MemoryDefs in the operand list all have
@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,

  initializeCongruenceClasses(F);

+  unsigned int Iterations = 0;
  // We start out in the entry block.
  BasicBlock *LastBlock = &F.getEntryBlock();
  while (TouchedInstructions.any()) {
+    ++Iterations;
    // Walk through all the instructions in all the blocks in RPO.
    for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
         InstrNum = TouchedInstructions.find_next(InstrNum)) {
@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
      TouchedInstructions.reset(InstrNum);
    }
  }
-
-// FIXME: Move this to expensive checks when we are satisfied with NewGVN
+  NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
 #ifndef NDEBUG
  verifyMemoryCongruency();
 #endif
@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) {

    // Cleanup the congruence class.
    SmallPtrSet<Value *, 4> MembersLeft;
-    for (Value * Member : CC->Members) {
+    for (Value *Member : CC->Members) {
      if (Member->getType()->isVoidTy()) {
        MembersLeft.insert(Member);
        continue;
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,

 /// When inlining a function that contains noalias scope metadata,
 /// this metadata needs to be cloned so that the inlined blocks
-/// have different "unqiue scopes" at every call site. Were this not done, then
+/// have different "unique scopes" at every call site. Were this not done, then
 /// aliasing scopes from a function inlined into a caller multiple times could
 /// not be differentiated (and this would lead to miscompiles because the
 /// non-aliasing property communicated by the metadata could have
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);

  uint64_t TrueWeight, FalseWeight;
-  uint64_t ExitWeight = 0, BackEdgeWeight = 0;
+  uint64_t ExitWeight = 0, CurHeaderWeight = 0;
  if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
    ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
-    BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight;
+    // The # of times the loop body executes is the sum of the exit block
+    // weight and the # of times the backedges are taken.
+    CurHeaderWeight = TrueWeight + FalseWeight;
  }

  // For each peeled-off iteration, make a copy of the loop.
@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
    SmallVector<BasicBlock *, 8> NewBlocks;
    ValueToValueMapTy VMap;

-    // The exit weight of the previous iteration is the header entry weight
-    // of the current iteration. So this is exactly how many dynamic iterations
-    // the current peeled-off static iteration uses up.
+    // Subtract the exit weight from the current header weight -- the exit
+    // weight is exactly the weight of the previous iteration's header.
    // FIXME: due to the way the distribution is constructed, we need a
    // guard here to make sure we don't end up with non-positive weights.
-    if (ExitWeight < BackEdgeWeight)
-      BackEdgeWeight -= ExitWeight;
+    if (ExitWeight < CurHeaderWeight)
+      CurHeaderWeight -= ExitWeight;
    else
-      BackEdgeWeight = 1;
+      CurHeaderWeight = 1;

    cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
                    NewBlocks, LoopBlocks, VMap, LVMap, LI);
@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,

  // Adjust the branch weights on the loop exit.
  if (ExitWeight) {
+    // The backedge count is the difference of current header weight and
+    // current loop exit weight. If the current header weight is smaller than
+    // the current loop exit weight, we mark the loop backedge weight as 1.
+    uint64_t BackEdgeWeight = 0;
+    if (ExitWeight < CurHeaderWeight)
+      BackEdgeWeight = CurHeaderWeight - ExitWeight;
+    else
+      BackEdgeWeight = 1;
    MDBuilder MDB(LatchBR->getContext());
    MDNode *WeightNode =
        HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
    I0->getOperandUse(O).set(NewOperands[O]);
  I0->moveBefore(&*BBEnd->getFirstInsertionPt());

-  // Update metadata and IR flags.
+  // The debug location for the "common" instruction is the merged locations of
+  // all the commoned instructions.  We start with the original location of the
+  // "common" instruction and iteratively merge each location in the loop below.
+  DILocation *Loc = I0->getDebugLoc();
+
+  // Update metadata and IR flags, and merge debug locations.
  for (auto *I : Insts)
    if (I != I0) {
+      Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
      combineMetadataForCSE(I0, I);
      I0->andIRFlags(I);
    }
+  if (!isa<CallInst>(I0))
+    I0->setDebugLoc(Loc);

  if (!isa<StoreInst>(I0)) {
    // canSinkLastInstruction checked that all instructions were used by
--- a/contrib/llvm/tools/clang/include/clang-c/Index.h
+++ b/contrib/llvm/tools/clang/include/clang-c/Index.h
@ -2366,7 +2366,11 @@ enum CXCursorKind {
   */
  CXCursor_OMPTargetTeamsDistributeParallelForDirective = 277,

-  CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForDirective,
+  /** \brief OpenMP target teams distribute parallel for simd directive.
+   */
+  CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective = 278,
+
+  CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective,

  /**
   * \brief Cursor that represents the translation unit itself.
--- a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h
@ -2669,6 +2669,9 @@ DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeDirective,
 DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForDirective,
                  { TRY_TO(TraverseOMPExecutableDirective(S)); })

+DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForSimdDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 // OpenMP clauses.
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {
--- a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h
@ -3638,6 +3638,79 @@ public:
  }
 };

+/// This represents '#pragma omp target teams distribute parallel for simd'
+/// combined directive.
+///
+/// \code
+/// #pragma omp target teams distribute parallel for simd private(x)
+/// \endcode
+/// In this example directive '#pragma omp target teams distribute parallel
+/// for simd' has clause 'private' with the variables 'x'
+///
+class OMPTargetTeamsDistributeParallelForSimdDirective final
+    : public OMPLoopDirective {
+  friend class ASTStmtReader;
+
+  /// Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  OMPTargetTeamsDistributeParallelForSimdDirective(SourceLocation StartLoc,
+                                                   SourceLocation EndLoc,
+                                                   unsigned CollapsedNum,
+                                                   unsigned NumClauses)
+      : OMPLoopDirective(this,
+                         OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
+                         OMPD_target_teams_distribute_parallel_for_simd,
+                         StartLoc, EndLoc, CollapsedNum, NumClauses) {}
+
+  /// Build an empty directive.
+  ///
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPTargetTeamsDistributeParallelForSimdDirective(
+      unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(
+            this, OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
+            OMPD_target_teams_distribute_parallel_for_simd, SourceLocation(),
+            SourceLocation(), CollapsedNum, NumClauses) {}
+
+public:
+  /// Creates directive with a list of \a Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param CollapsedNum Number of collapsed loops.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param Exprs Helper expressions for CodeGen.
+  ///
+  static OMPTargetTeamsDistributeParallelForSimdDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+  /// Creates an empty directive with the place for \a NumClauses clauses.
+  ///
+  /// \param C AST context.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  static OMPTargetTeamsDistributeParallelForSimdDirective *
+  CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
+              EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() ==
+           OMPTargetTeamsDistributeParallelForSimdDirectiveClass;
+  }
+};
+
 } // end namespace clang

 #endif
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@ -3921,6 +3921,8 @@ def ext_ms_deref_template_argument: ExtWarn<
 def ext_ms_delayed_template_argument: ExtWarn<
  "using the undeclared type %0 as a default template argument is a "
  "Microsoft extension">, InGroup<MicrosoftTemplate>;
+def err_template_arg_deduced_incomplete_pack : Error<
+  "deduced incomplete pack %0 for template parameter %1">;

 // C++ template specialization
 def err_template_spec_unknown_kind : Error<
--- a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def
@ -162,6 +162,9 @@
 #ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
 #define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)
 #endif
+#ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
+#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)
+#endif

 // OpenMP directives.
 OPENMP_DIRECTIVE(threadprivate)
@ -214,6 +217,7 @@ OPENMP_DIRECTIVE_EXT(teams_distribute_parallel_for, "teams distribute parallel f
 OPENMP_DIRECTIVE_EXT(target_teams, "target teams")
 OPENMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute")
 OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for, "target teams distribute parallel for")
+OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, "target teams distribute parallel for simd")

 // OpenMP clauses.
 OPENMP_CLAUSE(if, OMPIfClause)
@ -793,6 +797,33 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(proc_bind)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule)
 OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)

+// Clauses allowed for OpenMP directive
+// 'target teams distribute parallel for simd'.
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(if)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(device)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(map)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(private)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(nowait)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(depend)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(defaultmap)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(firstprivate)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(is_device_ptr)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(default)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(shared)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(reduction)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_teams)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(thread_limit)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(lastprivate)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(collapse)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(dist_schedule)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_threads)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(proc_bind)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(schedule)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(linear)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(aligned)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(safelen)
+OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(simdlen)
+
 #undef OPENMP_TASKLOOP_SIMD_CLAUSE
 #undef OPENMP_TASKLOOP_CLAUSE
 #undef OPENMP_LINEAR_KIND
@ -843,3 +874,4 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
 #undef OPENMP_TARGET_TEAMS_CLAUSE
 #undef OPENMP_TARGET_TEAMS_DISTRIBUTE_CLAUSE
 #undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
+#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
--- a/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td
@ -243,3 +243,4 @@ def OMPTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
 def OMPTargetTeamsDirective : DStmt<OMPExecutableDirective>;
 def OMPTargetTeamsDistributeDirective : DStmt<OMPLoopDirective>;
 def OMPTargetTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
+def OMPTargetTeamsDistributeParallelForSimdDirective : DStmt<OMPLoopDirective>;
--- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td
+++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td
@ -882,7 +882,7 @@ def fms_compatibility_version
               "(default))">;
 def fdelayed_template_parsing : Flag<["-"], "fdelayed-template-parsing">, Group<f_Group>,
  HelpText<"Parse templated function definitions at the end of the "
-           "translation unit">,  Flags<[CC1Option]>;
+           "translation unit">,  Flags<[CC1Option, CoreOption]>;
 def fms_memptr_rep_EQ : Joined<["-"], "fms-memptr-rep=">, Group<f_Group>, Flags<[CC1Option]>;
 def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group<i_Group>,
  Flags<[DriverOption, CC1Option]>, MetaVarName<"<directory>">,
@ -1031,7 +1031,8 @@ def fno_ms_extensions : Flag<["-"], "fno-ms-extensions">, Group<f_Group>,
  Flags<[CoreOption]>;
 def fno_ms_compatibility : Flag<["-"], "fno-ms-compatibility">, Group<f_Group>,
  Flags<[CoreOption]>;
-def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>;
+def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>,
+  Flags<[DriverOption, CoreOption]>;
 def fno_objc_exceptions: Flag<["-"], "fno-objc-exceptions">, Group<f_Group>;
 def fno_objc_legacy_dispatch : Flag<["-"], "fno-objc-legacy-dispatch">, Group<f_Group>;
 def fno_objc_weak : Flag<["-"], "fno-objc-weak">, Group<f_Group>, Flags<[CC1Option]>;
@ -1331,6 +1332,12 @@ def funique_section_names : Flag <["-"], "funique-section-names">,
 def fno_unique_section_names : Flag <["-"], "fno-unique-section-names">,
  Group<f_Group>, Flags<[CC1Option]>;

+def fstrict_return : Flag<["-"], "fstrict-return">, Group<f_Group>,
+  Flags<[CC1Option]>,
+  HelpText<"Always treat control flow paths that fall off the end of a non-void"
+           "function as unreachable">;
+def fno_strict_return : Flag<["-"], "fno-strict-return">, Group<f_Group>,
+  Flags<[CC1Option]>;

 def fdebug_types_section: Flag <["-"], "fdebug-types-section">, Group<f_Group>,
  Flags<[CC1Option]>, HelpText<"Place debug types in their own section (ELF Only)">;
--- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
@ -251,6 +251,10 @@ CODEGENOPT(DiagnosticsWithHotness, 1, 0)
 /// Whether copy relocations support is available when building as PIE.
 CODEGENOPT(PIECopyRelocations, 1, 0)

+/// Whether we should use the undefined behaviour optimization for control flow
+/// paths that reach the end of a function without executing a required return.
+CODEGENOPT(StrictReturn, 1, 1)
+
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
 #undef VALUE_CODEGENOPT
--- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
@ -8514,6 +8514,12 @@ public:
      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
      SourceLocation EndLoc,
      llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
+  /// Called on well-formed '\#pragma omp target teams distribute parallel for
+  /// simd' after parsing of the associated statement.
+  StmtResult ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
+      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+      SourceLocation EndLoc,
+      llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);

  /// Checks correctness of linear modifiers.
  bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,
--- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h
@ -1516,6 +1516,7 @@ namespace clang {
      STMT_OMP_TARGET_TEAMS_DIRECTIVE,
      STMT_OMP_TARGET_TEAMS_DISTRIBUTE_DIRECTIVE,
      STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE,
+      STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE,
      EXPR_OMP_ARRAY_SECTION,

      // ARC
--- a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h
@ -0,0 +1,100 @@
+//===--- Diagnostic.h - Framework for clang diagnostics tools --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+//  Structures supporting diagnostics and refactorings that span multiple
+//  translation units. Indicate diagnostics reports and replacements
+//  suggestions for the analyzed sources.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
+#define LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
+
+#include "Replacement.h"
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace clang {
+namespace tooling {
+
+/// \brief Represents the diagnostic message with the error message associated
+/// and the information on the location of the problem.
+struct DiagnosticMessage {
+  DiagnosticMessage(llvm::StringRef Message = "");
+
+  /// \brief Constructs a diagnostic message with anoffset to the diagnostic
+  /// within the file where the problem occured.
+  ///
+  /// \param Loc Should be a file location, it is not meaningful for a macro
+  /// location.
+  ///
+  DiagnosticMessage(llvm::StringRef Message, const SourceManager &Sources,
+                    SourceLocation Loc);
+  std::string Message;
+  std::string FilePath;
+  unsigned FileOffset;
+};
+
+/// \brief Represents the diagnostic with the level of severity and possible
+/// fixes to be applied.
+struct Diagnostic {
+  enum Level {
+    Warning = DiagnosticsEngine::Warning,
+    Error = DiagnosticsEngine::Error
+  };
+
+  Diagnostic() = default;
+
+  Diagnostic(llvm::StringRef DiagnosticName, Level DiagLevel,
+             StringRef BuildDirectory);
+
+  Diagnostic(llvm::StringRef DiagnosticName, DiagnosticMessage &Message,
+             llvm::StringMap<Replacements> &Fix,
+             SmallVector<DiagnosticMessage, 1> &Notes, Level DiagLevel,
+             llvm::StringRef BuildDirectory);
+
+  /// \brief Name identifying the Diagnostic.
+  std::string DiagnosticName;
+
+  /// \brief Message associated to the diagnostic.
+  DiagnosticMessage Message;
+
+  /// \brief Fixes to apply, grouped by file path.
+  llvm::StringMap<Replacements> Fix;
+
+  /// \brief Potential notes about the diagnostic.
+  SmallVector<DiagnosticMessage, 1> Notes;
+
+  /// \brief Diagnostic level. Can indicate either an error or a warning.
+  Level DiagLevel;
+
+  /// \brief A build directory of the diagnostic source file.
+  ///
+  /// It's an absolute path which is `directory` field of the source file in
+  /// compilation database. If users don't specify the compilation database
+  /// directory, it is the current directory where clang-tidy runs.
+  ///
+  /// Note: it is empty in unittest.
+  std::string BuildDirectory;
+};
+
+/// \brief Collection of Diagnostics generated from a single translation unit.
+struct TranslationUnitDiagnostics {
+  /// Name of the main source for the translation unit.
+  std::string MainSourceFile;
+  std::vector<Diagnostic> Diagnostics;
+};
+
+} // end namespace tooling
+} // end namespace clang
+#endif // LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
--- a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h
@ -329,12 +329,6 @@ llvm::Expected<std::string> applyAllReplacements(StringRef Code,
 struct TranslationUnitReplacements {
  /// Name of the main source for the translation unit.
  std::string MainSourceFile;
-
-  /// A freeform chunk of text to describe the context of the replacements.
-  /// Will be printed, for example, when detecting conflicts during replacement
-  /// deduplication.
-  std::string Context;
-
  std::vector<Replacement> Replacements;
 };

--- a/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
@ -0,0 +1,101 @@
+//===-- DiagnosticsYaml.h -- Serialiazation for Diagnosticss ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the structure of a YAML document for serializing
+/// diagnostics.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
+#define LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
+
+#include "clang/Tooling/Core/Diagnostic.h"
+#include "clang/Tooling/ReplacementsYaml.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <string>
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::tooling::Diagnostic)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<clang::tooling::Diagnostic> {
+  /// \brief Helper to (de)serialize a Diagnostic since we don't have direct
+  /// access to its data members.
+  class NormalizedDiagnostic {
+  public:
+    NormalizedDiagnostic(const IO &)
+        : DiagLevel(clang::tooling::Diagnostic::Level::Warning) {}
+
+    NormalizedDiagnostic(const IO &, const clang::tooling::Diagnostic &D)
+        : DiagnosticName(D.DiagnosticName), Message(D.Message), Fix(D.Fix),
+          Notes(D.Notes), DiagLevel(D.DiagLevel),
+          BuildDirectory(D.BuildDirectory) {}
+
+    clang::tooling::Diagnostic denormalize(const IO &) {
+      return clang::tooling::Diagnostic(DiagnosticName, Message, Fix, Notes,
+                                        DiagLevel, BuildDirectory);
+    }
+
+    std::string DiagnosticName;
+    clang::tooling::DiagnosticMessage Message;
+    llvm::StringMap<clang::tooling::Replacements> Fix;
+    SmallVector<clang::tooling::DiagnosticMessage, 1> Notes;
+    clang::tooling::Diagnostic::Level DiagLevel;
+    std::string BuildDirectory;
+  };
+
+  static void mapping(IO &Io, clang::tooling::Diagnostic &D) {
+    MappingNormalization<NormalizedDiagnostic, clang::tooling::Diagnostic> Keys(
+        Io, D);
+    Io.mapRequired("DiagnosticName", Keys->DiagnosticName);
+
+    // FIXME: Export properly all the different fields.
+
+    std::vector<clang::tooling::Replacement> Fixes;
+    for (auto &Replacements : Keys->Fix) {
+      for (auto &Replacement : Replacements.second) {
+        Fixes.push_back(Replacement);
+      }
+    }
+    Io.mapRequired("Replacements", Fixes);
+    for (auto &Fix : Fixes) {
+      llvm::Error Err = Keys->Fix[Fix.getFilePath()].add(Fix);
+      if (Err) {
+        // FIXME: Implement better conflict handling.
+        llvm::errs() << "Fix conflicts with existing fix: "
+                     << llvm::toString(std::move(Err)) << "\n";
+      }
+    }
+  }
+};
+
+/// \brief Specialized MappingTraits to describe how a
+/// TranslationUnitDiagnostics is (de)serialized.
+template <> struct MappingTraits<clang::tooling::TranslationUnitDiagnostics> {
+  static void mapping(IO &Io, clang::tooling::TranslationUnitDiagnostics &Doc) {
+    Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
+
+    std::vector<clang::tooling::Diagnostic> Diagnostics;
+    for (auto &Diagnostic : Doc.Diagnostics) {
+      // FIXME: Export all diagnostics, not just the ones with fixes.
+      // Update MappingTraits<clang::tooling::Diagnostic>::mapping.
+      if (Diagnostic.Fix.size() > 0) {
+        Diagnostics.push_back(Diagnostic);
+      }
+    }
+    Io.mapRequired("Diagnostics", Diagnostics);
+    Doc.Diagnostics = Diagnostics;
+  }
+};
+} // end namespace yaml
+} // end namespace llvm
+
+#endif // LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
--- a/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h
+++ b/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h
@ -65,7 +65,6 @@ template <> struct MappingTraits<clang::tooling::TranslationUnitReplacements> {
  static void mapping(IO &Io,
                      clang::tooling::TranslationUnitReplacements &Doc) {
    Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
-    Io.mapOptional("Context", Doc.Context, std::string());
    Io.mapRequired("Replacements", Doc.Replacements);
  }
 };
--- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
@ -7192,6 +7192,12 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
                               CharUnits &EndOffset) {
  bool DetermineForCompleteObject = refersToCompleteObject(LVal);

+  auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) {
+    if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType())
+      return false;
+    return HandleSizeof(Info, ExprLoc, Ty, Result);
+  };
+
  // We want to evaluate the size of the entire object. This is a valid fallback
  // for when Type=1 and the designator is invalid, because we're asked for an
  // upper-bound.
@ -7209,7 +7215,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
      return false;

    QualType BaseTy = getObjectType(LVal.getLValueBase());
-    return !BaseTy.isNull() && HandleSizeof(Info, ExprLoc, BaseTy, EndOffset);
+    return CheckedHandleSizeof(BaseTy, EndOffset);
  }

  // We want to evaluate the size of a subobject.
@ -7238,7 +7244,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
  }

  CharUnits BytesPerElem;
-  if (!HandleSizeof(Info, ExprLoc, Designator.MostDerivedType, BytesPerElem))
+  if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem))
    return false;

  // According to the GCC documentation, we want the size of the subobject
--- a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
@ -1659,3 +1659,64 @@ OMPTargetTeamsDistributeParallelForDirective::CreateEmpty(const ASTContext &C,
  return new (Mem)
      OMPTargetTeamsDistributeParallelForDirective(CollapsedNum, NumClauses);
 }
+
+OMPTargetTeamsDistributeParallelForSimdDirective *
+OMPTargetTeamsDistributeParallelForSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  auto Size =
+      llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
+                    alignof(OMPClause *));
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum,
+                          OMPD_target_teams_distribute_parallel_for_simd));
+  OMPTargetTeamsDistributeParallelForSimdDirective *Dir =
+      new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
+           StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPTargetTeamsDistributeParallelForSimdDirective *
+OMPTargetTeamsDistributeParallelForSimdDirective::CreateEmpty(
+    const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
+    EmptyShell) {
+  auto Size =
+      llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
+                    alignof(OMPClause *));
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum,
+                          OMPD_target_teams_distribute_parallel_for_simd));
+  return new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
+      CollapsedNum, NumClauses);
+}
+
--- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
@ -1244,6 +1244,12 @@ void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForDirective(
  PrintOMPExecutableDirective(Node);
 }

+void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
+    OMPTargetTeamsDistributeParallelForSimdDirective *Node) {
+  Indent() << "#pragma omp target teams distribute parallel for simd ";
+  PrintOMPExecutableDirective(Node);
+}
+
 //===----------------------------------------------------------------------===//
 //  Expr printing methods.
 //===----------------------------------------------------------------------===//
--- a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
@ -763,6 +763,11 @@ void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForDirective(
  VisitOMPLoopDirective(S);
 }

+void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
+    const OMPTargetTeamsDistributeParallelForSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
 void StmtProfiler::VisitExpr(const Expr *S) {
  VisitStmt(S);
 }
--- a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
@ -1690,15 +1690,19 @@ CFGBuilder::VisitLogicalOperator(BinaryOperator *B,
    // we have been provided.
    ExitBlock = RHSBlock = createBlock(false);

+    // Even though KnownVal is only used in the else branch of the next
+    // conditional, tryEvaluateBool performs additional checking on the
+    // Expr, so it should be called unconditionally.
+    TryResult KnownVal = tryEvaluateBool(RHS);
+    if (!KnownVal.isKnown())
+      KnownVal = tryEvaluateBool(B);
+
    if (!Term) {
      assert(TrueBlock == FalseBlock);
      addSuccessor(RHSBlock, TrueBlock);
    }
    else {
      RHSBlock->setTerminator(Term);
-      TryResult KnownVal = tryEvaluateBool(RHS);
-      if (!KnownVal.isKnown())
-        KnownVal = tryEvaluateBool(B);
      addSuccessor(RHSBlock, TrueBlock, !KnownVal.isFalse());
      addSuccessor(RHSBlock, FalseBlock, !KnownVal.isTrue());
    }
--- a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
@ -685,6 +685,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 #define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)               \
  case OMPC_##Name:                                                            \
    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_teams_distribute_parallel_for_simd:
+    switch (CKind) {
+#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)          \
+  case OMPC_##Name:                                                            \
+    return true;
 #include "clang/Basic/OpenMPKinds.def"
    default:
      break;
@ -721,7 +731,8 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
         DKind == OMPD_teams_distribute_parallel_for_simd ||
         DKind == OMPD_teams_distribute_parallel_for ||
         DKind == OMPD_target_teams_distribute ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }

 bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@ -735,8 +746,8 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
         DKind == OMPD_target_parallel_for_simd ||
         DKind == OMPD_teams_distribute_parallel_for_simd ||
         DKind == OMPD_teams_distribute_parallel_for ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
-  // TODO add next directives.
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }

 bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
@ -752,8 +763,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
         DKind == OMPD_target_parallel_for_simd ||
         DKind == OMPD_teams_distribute_parallel_for ||
         DKind == OMPD_teams_distribute_parallel_for_simd ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
-  // TODO add next directives.
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }

 bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
@ -761,7 +772,8 @@ bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
         DKind == OMPD_target_parallel_for || 
         DKind == OMPD_target_parallel_for_simd || DKind == OMPD_target_simd ||
         DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }

 bool clang::isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind) {
@ -779,7 +791,8 @@ bool clang::isOpenMPNestingTeamsDirective(OpenMPDirectiveKind DKind) {
 bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
  return isOpenMPNestingTeamsDirective(DKind) ||
         DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
-         DKind == OMPD_target_teams_distribute_parallel_for;
+         DKind == OMPD_target_teams_distribute_parallel_for ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }

 bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
@ -788,8 +801,8 @@ bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
         DKind == OMPD_distribute_parallel_for_simd ||
         DKind == OMPD_distribute_simd || DKind == OMPD_target_simd ||
         DKind == OMPD_teams_distribute_simd ||
-         DKind == OMPD_teams_distribute_parallel_for_simd;
-  // TODO add next directives.
+         DKind == OMPD_teams_distribute_parallel_for_simd ||
+         DKind == OMPD_target_teams_distribute_parallel_for_simd;
 }

 bool clang::isOpenMPNestingDistributeDirective(OpenMPDirectiveKind Kind) {
@ -805,7 +818,8 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
         Kind == OMPD_teams_distribute_parallel_for_simd ||
         Kind == OMPD_teams_distribute_parallel_for ||
         Kind == OMPD_target_teams_distribute ||
-         Kind == OMPD_target_teams_distribute_parallel_for;
+         Kind == OMPD_target_teams_distribute_parallel_for ||
+         Kind == OMPD_target_teams_distribute_parallel_for_simd;
 }

 bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
@ -830,5 +844,6 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
         Kind == OMPD_teams_distribute_parallel_for_simd ||
         Kind == OMPD_teams_distribute_parallel_for ||
         Kind == OMPD_target_teams_distribute ||
-         Kind == OMPD_target_teams_distribute_parallel_for;
+         Kind == OMPD_target_teams_distribute_parallel_for ||
+         Kind == OMPD_target_teams_distribute_parallel_for_simd;
 }
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@ -20,53 +20,64 @@
 using namespace clang;
 using namespace CodeGen;

-/// \brief Get the GPU warp size.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+  /// kmp_int32 thread_limit);
+  OMPRTL_NVPTX__kmpc_kernel_init,
+};
+
+// NVPTX Address space
+enum AddressSpace {
+  AddressSpaceShared = 3,
+};
+} // namespace
+
+/// Get the GPU warp size.
+static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  return Bld.CreateCall(
      llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
      llvm::None, "nvptx_warp_size");
 }

-/// \brief Get the id of the current thread on the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
+/// Get the id of the current thread on the GPU.
+static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  return Bld.CreateCall(
      llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
      llvm::None, "nvptx_tid");
 }

-// \brief Get the maximum number of threads in a block of the GPU.
-llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
+/// Get the maximum number of threads in a block of the GPU.
+static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  return Bld.CreateCall(
      llvm::Intrinsic::getDeclaration(
-          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
      llvm::None, "nvptx_num_threads");
 }

-/// \brief Get barrier to synchronize all threads in a block.
-void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
+/// Get barrier to synchronize all threads in a block.
+static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  Bld.CreateCall(llvm::Intrinsic::getDeclaration(
-      &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
+      &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
 }

-// \brief Synchronize all GPU threads in a block.
-void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
-  getNVPTXCTABarrier(CGF);
-}
+/// Synchronize all GPU threads in a block.
+static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }

-/// \brief Get the thread id of the OMP master thread.
+/// Get the thread id of the OMP master thread.
 /// The master thread id is the first thread (lane) of the last warp in the
 /// GPU block.  Warp size is assumed to be some power of 2.
 /// Thread id is 0 indexed.
 /// E.g: If NumThreads is 33, master id is 32.
 ///      If NumThreads is 64, master id is 32.
 ///      If NumThreads is 1024, master id is 992.
-llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
+static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
  CGBuilderTy &Bld = CGF.Builder;
  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);

@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
                       Bld.CreateNot(Mask), "master_tid");
 }

-namespace {
-enum OpenMPRTLFunctionNVPTX {
-  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
-  /// kmp_int32 thread_limit);
-  OMPRTL_NVPTX__kmpc_kernel_init,
-};
-
-// NVPTX Address space
-enum ADDRESS_SPACE {
-  ADDRESS_SPACE_SHARED = 3,
-};
-} // namespace
-
 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
    CodeGenModule &CGM)
    : WorkerFn(nullptr), CGFI(nullptr) {
@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
      CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
      llvm::GlobalValue::CommonLinkage,
      llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
-      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+      llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
  ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));

  WorkID = new llvm::GlobalVariable(
      CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
      llvm::GlobalValue::CommonLinkage,
      llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
-      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+      llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
  WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
 }

--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@ -49,38 +49,6 @@ public:
  void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);

 private:
-  //
-  // NVPTX calls.
-  //
-
-  /// \brief Get the GPU warp size.
-  llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
-
-  /// \brief Get the id of the current thread on the GPU.
-  llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
-
-  // \brief Get the maximum number of threads in a block of the GPU.
-  llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
-
-  /// \brief Get barrier to synchronize all threads in a block.
-  void getNVPTXCTABarrier(CodeGenFunction &CGF);
-
-  // \brief Synchronize all GPU threads in a block.
-  void syncCTAThreads(CodeGenFunction &CGF);
-
-  //
-  // OMP calls.
-  //
-
-  /// \brief Get the thread id of the OMP master thread.
-  /// The master thread id is the first thread (lane) of the last warp in the
-  /// GPU block.  Warp size is assumed to be some power of 2.
-  /// Thread id is 0 indexed.
-  /// E.g: If NumThreads is 33, master id is 32.
-  ///      If NumThreads is 64, master id is 32.
-  ///      If NumThreads is 1024, master id is 992.
-  llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
-
  //
  // Private state and methods.
  //
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
@ -326,6 +326,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
    EmitOMPTargetTeamsDistributeParallelForDirective(
        cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
    break;
+  case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
+    EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+        cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
+    break;
  }
 }

--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@ -2032,6 +2032,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
      });
 }

+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+    const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_target_teams_distribute_parallel_for_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
 /// \brief Emit a helper variable and return corresponding lvalue.
 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
                               const DeclRefExpr *Helper) {
@ -2760,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {

  auto &RT = CGM.getOpenMPRuntime();

+  bool HasLastprivateClause = false;
  // Check pre-condition.
  {
    OMPLoopScope PreInitScope(*this, S);
@ -2793,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));

      OMPPrivateScope LoopScope(*this);
+      if (EmitOMPFirstprivateClause(S, LoopScope)) {
+        // Emit implicit barrier to synchronize threads and avoid data races on
+        // initialization of firstprivate variables and post-update of
+        // lastprivate variables.
+        CGM.getOpenMPRuntime().emitBarrierCall(
+          *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+          /*ForceSimpleCall=*/true);
+      }
+      EmitOMPPrivateClause(S, LoopScope);
+      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
      EmitOMPPrivateLoopCounters(S, LoopScope);
      (void)LoopScope.Privatize();

@ -2849,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
                            LB.getAddress(), UB.getAddress(), ST.getAddress(),
                            IL.getAddress(), Chunk);
      }
+
+      // Emit final copy of the lastprivate variables if IsLastIter != 0.
+      if (HasLastprivateClause)
+        EmitOMPLastprivateClauseFinal(
+            S, /*NoFinals=*/false,
+            Builder.CreateIsNotNull(
+                EmitLoadOfScalar(IL, S.getLocStart())));
    }

    // We're now done with the loop, so jump to the continuation block.
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@ -1049,6 +1049,19 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
  return ResTy;
 }

+static bool
+shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD,
+                                             const ASTContext &Context) {
+  QualType T = FD->getReturnType();
+  // Avoid the optimization for functions that return a record type with a
+  // trivial destructor or another trivially copyable type.
+  if (const RecordType *RT = T.getCanonicalType()->getAs<RecordType>()) {
+    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      return !ClassDecl->hasTrivialDestructor();
+  }
+  return !T.isTriviallyCopyableType(Context);
+}
+
 void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                                   const CGFunctionInfo &FnInfo) {
  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
@ -1127,17 +1140,23 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
  //   function call is used by the caller, the behavior is undefined.
  if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock &&
      !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
+    bool ShouldEmitUnreachable =
+        CGM.getCodeGenOpts().StrictReturn ||
+        shouldUseUndefinedBehaviorReturnOptimization(FD, getContext());
    if (SanOpts.has(SanitizerKind::Return)) {
      SanitizerScope SanScope(this);
      llvm::Value *IsFalse = Builder.getFalse();
      EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return),
                SanitizerHandler::MissingReturn,
                EmitCheckSourceLocation(FD->getLocation()), None);
-    } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-      EmitTrapCall(llvm::Intrinsic::trap);
+    } else if (ShouldEmitUnreachable) {
+      if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+        EmitTrapCall(llvm::Intrinsic::trap);
+    }
+    if (SanOpts.has(SanitizerKind::Return) || ShouldEmitUnreachable) {
+      Builder.CreateUnreachable();
+      Builder.ClearInsertionPoint();
    }
-    Builder.CreateUnreachable();
-    Builder.ClearInsertionPoint();
  }

  // Emit the standard function epilogue.
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
@ -2699,6 +2699,8 @@ public:
      const OMPTargetTeamsDistributeDirective &S);
  void EmitOMPTargetTeamsDistributeParallelForDirective(
      const OMPTargetTeamsDistributeParallelForDirective &S);
+  void EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+      const OMPTargetTeamsDistributeParallelForSimdDirective &S);

  /// Emit outlined function for the target directive.
  static std::pair<llvm::Function * /*OutlinedFn*/,
@ -3569,7 +3571,7 @@ public:

    // If we still have any arguments, emit them using the type of the argument.
    for (auto *A : llvm::make_range(Arg, ArgRange.end()))
-      ArgTypes.push_back(getVarArgType(A));
+      ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType());

    EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order);
  }
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@ -2235,6 +2235,15 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
                   UseSeparateSections)) {
    CmdArgs.push_back("-plugin-opt=-data-sections");
  }
+
+  if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) {
+    StringRef FName = A->getValue();
+    if (!llvm::sys::fs::exists(FName))
+      D.Diag(diag::err_drv_no_such_file) << FName;
+    else
+      CmdArgs.push_back(
+          Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName));
+  }
 }

 /// This is a helper function for validating the optional refinement step
@ -3058,6 +3067,10 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
        continue;
      }

+      if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() &&
+          Value == "-mbig-obj")
+        continue; // LLVM handles bigobj automatically
+
      switch (C.getDefaultToolChain().getArch()) {
      default:
        break;
@ -4453,6 +4466,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
  if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
                   false))
    CmdArgs.push_back("-fstrict-enums");
+  if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return,
+                    true))
+    CmdArgs.push_back("-fno-strict-return");
  if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
                   options::OPT_fno_strict_vtable_pointers,
                   false))
--- a/contrib/llvm/tools/clang/lib/Format/Format.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp
@ -638,6 +638,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
    ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
    ChromiumStyle.ContinuationIndentWidth = 8;
    ChromiumStyle.IndentWidth = 4;
+  } else if (Language == FormatStyle::LK_JavaScript) {
+    ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
+    ChromiumStyle.AllowShortLoopsOnASingleLine = false;
  } else {
    ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
    ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@ -1255,10 +1255,13 @@ void UnwrappedLineParser::tryToParseJSFunction() {
    if (FormatTok->is(tok::l_brace))
      tryToParseBracedList();
    else
-      while (FormatTok->isNot(tok::l_brace) && !eof())
+      while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
        nextToken();
  }

+  if (FormatTok->is(tok::semi))
+    return;
+
  parseChildBlock();
 }

--- a/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
@ -370,6 +370,26 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
    break;
  }

+  case Decl::ClassTemplateSpecialization: {
+    const auto *CTSD = cast<ClassTemplateSpecializationDecl>(DC);
+    if (CTSD->isCompleteDefinition())
+      Out << "[class template specialization] ";
+    else
+      Out << "<class template specialization> ";
+    Out << *CTSD;
+    break;
+  }
+
+  case Decl::ClassTemplatePartialSpecialization: {
+    const auto *CTPSD = cast<ClassTemplatePartialSpecializationDecl>(DC);
+    if (CTPSD->isCompleteDefinition())
+      Out << "[class template partial specialization] ";
+    else
+      Out << "<class template partial specialization> ";
+    Out << *CTPSD;
+    break;
+  }
+
  default:
    llvm_unreachable("a decl that inherits DeclContext isn't handled");
  }
@ -400,7 +420,8 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
    case Decl::CXXConstructor:
    case Decl::CXXDestructor:
    case Decl::CXXConversion:
-    {
+    case Decl::ClassTemplateSpecialization:
+    case Decl::ClassTemplatePartialSpecialization: {
      DeclContext* DC = cast<DeclContext>(I);
      PrintDeclContext(DC, Indentation+2);
      break;
@ -478,6 +499,37 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
      Out << "<omp threadprivate> " << '"' << I << "\"\n";
      break;
    }
+    case Decl::Friend: {
+      Out << "<friend>";
+      if (const NamedDecl *ND = cast<FriendDecl>(I)->getFriendDecl())
+        Out << ' ' << *ND;
+      Out << "\n";
+      break;
+    }
+    case Decl::Using: {
+      Out << "<using> " << *cast<UsingDecl>(I) << "\n";
+      break;
+    }
+    case Decl::UsingShadow: {
+      Out << "<using shadow> " << *cast<UsingShadowDecl>(I) << "\n";
+      break;
+    }
+    case Decl::Empty: {
+      Out << "<empty>\n";
+      break;
+    }
+    case Decl::AccessSpec: {
+      Out << "<access specifier>\n";
+      break;
+    }
+    case Decl::VarTemplate: {
+      Out << "<var template> " << *cast<VarTemplateDecl>(I) << "\n";
+      break;
+    }
+    case Decl::StaticAssert: {
+      Out << "<static assert>\n";
+      break;
+    }
    default:
      Out << "DeclKind: " << DK << '"' << I << "\"\n";
      llvm_unreachable("decl unhandled");
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
@ -602,6 +602,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
  Opts.NoDwarfDirectoryAsm = Args.hasArg(OPT_fno_dwarf_directory_asm);
  Opts.SoftFloat = Args.hasArg(OPT_msoft_float);
  Opts.StrictEnums = Args.hasArg(OPT_fstrict_enums);
+  Opts.StrictReturn = !Args.hasArg(OPT_fno_strict_return);
  Opts.StrictVTablePointers = Args.hasArg(OPT_fstrict_vtable_pointers);
  Opts.UnsafeFPMath = Args.hasArg(OPT_menable_unsafe_fp_math) ||
                      Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
--- a/Show More
+++ b/Show More