Update llvm to trunk r256945.

svn path=/projects/clang380-import/; revision=293265
2016-01-06 20:19:13 +00:00 · 2016-01-06 20:19:13 +00:00 · 4d0b32cd7f · 2020-12-20 02:59:44 +00:00
commit 4d0b32cd7f
parent 9b52dc8469 8a6c1c25bc
133 changed files with 2992 additions and 1575 deletions
--- a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
@ -59,11 +59,6 @@ bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
 bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
                   bool LookThroughBitCast = false);

-/// \brief Tests if a value is a call or invoke to a library function that
-/// allocates memory and never returns null (such as operator new).
-bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
-                         bool LookThroughBitCast = false);
-
 //===----------------------------------------------------------------------===//
 //  malloc Call Utility Functions.
 //
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
@ -97,7 +97,7 @@ class MachineInstr
  // of memory operands required to be precise exceeds the maximum value of
  // NumMemRefs - currently 256 - we remove the operands entirely. Note also
  // that this is a non-owning reference to a shared copy on write buffer owned
-  // by the MachineFunction and created via MF.allocateMemRefsArray. 
+  // by the MachineFunction and created via MF.allocateMemRefsArray.
  mmo_iterator MemRefs;

  DebugLoc debugLoc;                    // Source line information.
@ -354,7 +354,7 @@ class MachineInstr
  mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
  /// Return true if we don't have any memory operands which described the the
  /// memory access done by this instruction.  If this is true, calling code
-  /// must be conservative.    
+  /// must be conservative.
  bool memoperands_empty() const { return NumMemRefs == 0; }

  iterator_range<mmo_iterator>  memoperands() {
@ -774,7 +774,7 @@ class MachineInstr
  bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
  bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
  bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
-  bool isMSInlineAsm() const { 
+  bool isMSInlineAsm() const {
    return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect();
  }
  bool isStackAligningInlineAsm() const;
@ -1180,11 +1180,26 @@ class MachineInstr
  /// Assign this MachineInstr's memory reference descriptor list.
  /// This does not transfer ownership.
  void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
-    MemRefs = NewMemRefs;
-    NumMemRefs = uint8_t(NewMemRefsEnd - NewMemRefs);
-    assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs");
+    setMemRefs(std::make_pair(NewMemRefs, NewMemRefsEnd-NewMemRefs));
  }

+  /// Assign this MachineInstr's memory reference descriptor list.  First
+  /// element in the pair is the begin iterator/pointer to the array; the
+  /// second is the number of MemoryOperands.  This does not transfer ownership
+  /// of the underlying memory.
+  void setMemRefs(std::pair<mmo_iterator, unsigned> NewMemRefs) {
+    MemRefs = NewMemRefs.first;
+    NumMemRefs = uint8_t(NewMemRefs.second);
+    assert(NumMemRefs == NewMemRefs.second &&
+           "Too many memrefs - must drop memory operands");
+  }
+
+  /// Return a set of memrefs (begin iterator, size) which conservatively
+  /// describe the memory behavior of both MachineInstrs.  This is appropriate
+  /// for use when merging two MachineInstrs into one. This routine does not
+  /// modify the memrefs of the this MachineInstr.
+  std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
+
  /// Clear this MachineInstr's memory reference descriptor list.  This resets
  /// the memrefs to their most conservative state.  This should be used only
  /// as a last resort since it greatly pessimizes our knowledge of the memory
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@ -162,6 +162,11 @@ class MachineInstrBuilder {
    return *this;
  }

+  const MachineInstrBuilder &setMemRefs(std::pair<MachineInstr::mmo_iterator,
+                                        unsigned> MemOperandsRef) const {
+    MI->setMemRefs(MemOperandsRef);
+    return *this;
+  }

  const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
    MI->addOperand(*MF, MO);
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@ -178,7 +178,7 @@ class MachineOperandIteratorBase {
    /// register.
    bool FullyDefined;

-    /// Reg or ont of its aliases is read. The register may only be read
+    /// Reg or one of its aliases is read. The register may only be read
    /// partially.
    bool Read;
    /// Reg or a super-register is read. The full register is read.
--- a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
@ -83,7 +83,9 @@ enum class ClrHandlerType { Catch, Finally, Fault, Filter };
 struct ClrEHUnwindMapEntry {
  MBBOrBasicBlock Handler;
  uint32_t TypeToken;
-  int Parent;
+  int HandlerParentState; ///< Outer handler enclosing this entry's handler
+  int TryParentState; ///< Outer try region enclosing this entry's try region,
+                      ///< treating later catches on same try as "outer"
  ClrHandlerType HandlerType;
 };

--- a/contrib/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm/include/llvm/IR/CallSite.h
@ -310,6 +310,11 @@ class CallSiteBase {
    CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
  }

+  /// \brief Return true if this function has the given attribute.
+  bool hasFnAttr(StringRef A) const {
+    CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
+  }
+
  /// \brief Return true if the call or the callee has the given attribute.
  bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
    CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A));
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@ -61,9 +61,13 @@ class IRBuilderBase {
  MDNode *DefaultFPMathTag;
  FastMathFlags FMF;

+  ArrayRef<OperandBundleDef> DefaultOperandBundles;
+
 public:
-  IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr)
-    : Context(context), DefaultFPMathTag(FPMathTag), FMF() {
+  IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
+                ArrayRef<OperandBundleDef> OpBundles = None)
+      : Context(context), DefaultFPMathTag(FPMathTag), FMF(),
+        DefaultOperandBundles(OpBundles) {
    ClearInsertionPoint();
  }

@ -538,37 +542,44 @@ class IRBuilder : public IRBuilderBase, public Inserter {

 public:
  IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(),
-            MDNode *FPMathTag = nullptr)
-      : IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {}
+            MDNode *FPMathTag = nullptr,
+            ArrayRef<OperandBundleDef> OpBundles = None)
+      : IRBuilderBase(C, FPMathTag, OpBundles), Inserter(std::move(I)),
+        Folder(F) {}

-  explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(C, FPMathTag), Folder() {
-  }
+  explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr,
+                     ArrayRef<OperandBundleDef> OpBundles = None)
+      : IRBuilderBase(C, FPMathTag, OpBundles), Folder() {}

-  explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
+  explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr,
+                     ArrayRef<OperandBundleDef> OpBundles = None)
+      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
    SetInsertPoint(TheBB);
  }

-  explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
+  explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr,
+                     ArrayRef<OperandBundleDef> OpBundles = None)
+      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
    SetInsertPoint(TheBB);
  }

-  explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(IP->getContext(), FPMathTag), Folder() {
+  explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr,
+                     ArrayRef<OperandBundleDef> OpBundles = None)
+      : IRBuilderBase(IP->getContext(), FPMathTag, OpBundles), Folder() {
    SetInsertPoint(IP);
  }

-  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
-            MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
+  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T &F,
+            MDNode *FPMathTag = nullptr,
+            ArrayRef<OperandBundleDef> OpBundles = None)
+      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
    SetInsertPoint(TheBB, IP);
  }

  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP,
-            MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
+            MDNode *FPMathTag = nullptr,
+            ArrayRef<OperandBundleDef> OpBundles = None)
+      : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
    SetInsertPoint(TheBB, IP);
  }

@ -1529,8 +1540,11 @@ class IRBuilder : public IRBuilderBase, public Inserter {

  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
                       ArrayRef<OperandBundleDef> OpBundles = None,
-                       const Twine &Name = "") {
-    return Insert(CallInst::Create(Callee, Args, OpBundles), Name);
+                       const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
+    if (isa<FPMathOperator>(CI))
+      CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
+    return Insert(CI, Name);
  }

  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
@ -1543,7 +1557,7 @@ class IRBuilder : public IRBuilderBase, public Inserter {
  CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee,
                       ArrayRef<Value *> Args, const Twine &Name = "",
                       MDNode *FPMathTag = nullptr) {
-    CallInst *CI = CallInst::Create(FTy, Callee, Args);
+    CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
    if (isa<FPMathOperator>(CI))
      CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
    return Insert(CI, Name);
--- a/contrib/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm/include/llvm/IR/Instructions.h
@ -3550,6 +3550,11 @@ class InvokeInst : public TerminatorInst,
    return hasFnAttrImpl(A);
  }

+  /// \brief Determine whether this call has the given attribute.
+  bool hasFnAttr(StringRef A) const {
+    return hasFnAttrImpl(A);
+  }
+
  /// \brief Determine whether the call or the callee has the given attributes.
  bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;

@ -3734,7 +3739,19 @@ class InvokeInst : public TerminatorInst,
  unsigned getNumSuccessorsV() const override;
  void setSuccessorV(unsigned idx, BasicBlock *B) override;

-  bool hasFnAttrImpl(Attribute::AttrKind A) const;
+  template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
+    if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+      return true;
+
+    // Operand bundles override attributes on the called function, but don't
+    // override attributes directly present on the invoke instruction.
+    if (isFnAttrDisallowedByOpBundle(A))
+      return false;
+
+    if (const Function *F = getCalledFunction())
+      return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+    return false;
+  }

  // Shadow Instruction::setInstructionSubclassData with a private forwarding
  // method so that subclasses cannot accidentally use it.
@ -3966,6 +3983,8 @@ class CatchSwitchInst : public TerminatorInst {
  /// point to the added handler.
  void addHandler(BasicBlock *Dest);

+  void removeHandler(handler_iterator HI);
+
  unsigned getNumSuccessors() const { return getNumOperands() - 1; }
  BasicBlock *getSuccessor(unsigned Idx) const {
    assert(Idx < getNumSuccessors() &&
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@ -32,6 +32,19 @@ let TargetPrefix = "x86" in {
                                        [IntrNoMem]>;
 }

+//===----------------------------------------------------------------------===//
+// FLAGS.
+let TargetPrefix = "x86" in {
+  def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">,
+        Intrinsic<[llvm_i32_ty], [], []>;
+  def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">,
+        Intrinsic<[llvm_i64_ty], [], []>;
+  def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">,
+        Intrinsic<[], [llvm_i32_ty], []>;
+  def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">,
+        Intrinsic<[], [llvm_i64_ty], []>;
+}
+
 //===----------------------------------------------------------------------===//
 // Read Time Stamp Counter.
 let TargetPrefix = "x86" in {
@ -2211,6 +2224,25 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
                         llvm_i8_ty, llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;

+  def int_x86_avx512_mask_psra_w_128 : GCCBuiltin<"__builtin_ia32_psraw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_w_256 : GCCBuiltin<"__builtin_ia32_psraw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_wi_128 : GCCBuiltin<"__builtin_ia32_psrawi128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_wi_256 : GCCBuiltin<"__builtin_ia32_psrawi256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_wi_512 : GCCBuiltin<"__builtin_ia32_psrawi512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+
  def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
                         llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
@ -2229,6 +2261,69 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
  def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">,
              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                         llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_d_256 : GCCBuiltin<"__builtin_ia32_psrad256_mask">,
+             Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, 
+                         llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_di_128 : GCCBuiltin<"__builtin_ia32_psradi128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_di_256 : GCCBuiltin<"__builtin_ia32_psradi256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, 
+                        llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_di_512 : GCCBuiltin<"__builtin_ia32_psradi512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+                         llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, 
+                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256_mask">,
+             Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, 
+                         llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_qi_128 : GCCBuiltin<"__builtin_ia32_psraqi128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, 
+                         llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_qi_256 : GCCBuiltin<"__builtin_ia32_psraqi256_mask">,
+             Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, 
+                         llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psra_qi_512 : GCCBuiltin<"__builtin_ia32_psraqi512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, 
+                         llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_psrl_d_128: GCCBuiltin<"__builtin_ia32_psrld128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_d_256: GCCBuiltin<"__builtin_ia32_psrld256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty, 
+                         llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_di_128: GCCBuiltin<"__builtin_ia32_psrldi128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty, 
+                         llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_di_256: GCCBuiltin<"__builtin_ia32_psrldi256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty, 
+                         llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_di_512: GCCBuiltin<"__builtin_ia32_psrldi512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [ llvm_v16i32_ty, 
+                         llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty ], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_psrl_q_128:  GCCBuiltin<"__builtin_ia32_psrlq128_mask">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, 
+                   llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_q_256: GCCBuiltin<"__builtin_ia32_psrlq256_mask">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,  
+                   llvm_v2i64_ty, llvm_v4i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_qi_128: GCCBuiltin<"__builtin_ia32_psrlqi128_mask">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                   llvm_i8_ty, llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_qi_256: GCCBuiltin<"__builtin_ia32_psrlqi256_mask">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                   llvm_i8_ty, llvm_v4i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_qi_512: GCCBuiltin<"__builtin_ia32_psrlqi512_mask">,
+        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, 
+                   llvm_i8_ty, llvm_v8i64_ty,  llvm_i8_ty], [IntrNoMem]>;
 }

 // Pack ops.
@ -2696,6 +2791,59 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
  def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], 
                        [IntrNoMem]>;                        
+
+  def int_x86_avx512_mask_psll_d_128 : GCCBuiltin<"__builtin_ia32_pslld128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_d_256 : GCCBuiltin<"__builtin_ia32_pslld256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, 
+                         llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_di_128 : GCCBuiltin<"__builtin_ia32_pslldi128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_di_256 : GCCBuiltin<"__builtin_ia32_pslldi256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, 
+                         llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_di_512 : GCCBuiltin<"__builtin_ia32_pslldi512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, 
+                         llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_q_128 : GCCBuiltin<"__builtin_ia32_psllq128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_q_256 : GCCBuiltin<"__builtin_ia32_psllq256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, 
+                         llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_qi_128 : GCCBuiltin<"__builtin_ia32_psllqi128_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, 
+                         llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_qi_256 : GCCBuiltin<"__builtin_ia32_psllqi256_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, 
+                         llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psll_qi_512 : GCCBuiltin<"__builtin_ia32_psllqi512_mask">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, 
+                         llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty, llvm_v16i16_ty,  llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrlv2_di : GCCBuiltin<"__builtin_ia32_psrlv2di_mask">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, 
+                         llvm_v32i16_ty, llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrlv4_di : GCCBuiltin<"__builtin_ia32_psrlv4di_mask">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, 
+                         llvm_v4i64_ty, llvm_v4i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrlv4_si : GCCBuiltin<"__builtin_ia32_psrlv4si_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_v4i32_ty, llvm_v4i32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, 
+                         llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, 
+                         llvm_v8i32_ty, llvm_v8i32_ty,  llvm_i8_ty], [IntrNoMem]>;
 }

 // Gather ops
@ -3919,9 +4067,9 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Support protection key
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
  def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">,
-              Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [], []>;
  def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">,
-              Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[], [llvm_i32_ty], []>;
 }
 //===----------------------------------------------------------------------===//
 // Half float conversion
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@ -283,14 +283,20 @@ class ReplaceableMetadataImpl {
  LLVMContext &Context;
  uint64_t NextIndex;
  SmallDenseMap<void *, std::pair<OwnerTy, uint64_t>, 4> UseMap;
+  /// Flag that can be set to false if this metadata should not be
+  /// RAUW'ed, e.g. if it is used as the key of a map.
+  bool CanReplace;

 public:
  ReplaceableMetadataImpl(LLVMContext &Context)
-      : Context(Context), NextIndex(0) {}
+      : Context(Context), NextIndex(0), CanReplace(true) {}
  ~ReplaceableMetadataImpl() {
    assert(UseMap.empty() && "Cannot destroy in-use replaceable metadata");
  }

+  /// Set the CanReplace flag to the given value.
+  void setCanReplace(bool Replaceable) { CanReplace = Replaceable; }
+
  LLVMContext &getContext() const { return Context; }

  /// \brief Replace all uses of this with MD.
@ -901,14 +907,19 @@ class MDNode : public Metadata {
    Context.getReplaceableUses()->replaceAllUsesWith(MD);
  }

+  /// Set the CanReplace flag to the given value.
+  void setCanReplace(bool Replaceable) {
+    Context.getReplaceableUses()->setCanReplace(Replaceable);
+  }
+
  /// \brief Resolve cycles.
  ///
  /// Once all forward declarations have been resolved, force cycles to be
-  /// resolved. If \p MDMaterialized is true, then any temporary metadata
+  /// resolved. If \p AllowTemps is true, then any temporary metadata
  /// is ignored, otherwise it asserts when encountering temporary metadata.
  ///
  /// \pre No operands (or operands' operands, etc.) have \a isTemporary().
-  void resolveCycles(bool MDMaterialized = true);
+  void resolveCycles(bool AllowTemps = false);

  /// \brief Replace a temporary node with a permanent one.
  ///
--- a/contrib/llvm/include/llvm/IR/Statepoint.h
+++ b/contrib/llvm/include/llvm/IR/Statepoint.h
@ -22,6 +22,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"

@ -36,14 +37,13 @@ enum class StatepointFlags {
  MaskAll = GCTransition ///< A bitmask that includes all valid flags.
 };

-class GCRelocateOperands;
+class GCRelocateInst;
 class ImmutableStatepoint;

 bool isStatepoint(const ImmutableCallSite &CS);
 bool isStatepoint(const Value *V);
 bool isStatepoint(const Value &V);

-bool isGCRelocate(const Value *V);
 bool isGCRelocate(const ImmutableCallSite &CS);

 bool isGCResult(const Value *V);
@ -247,7 +247,7 @@ class StatepointBase {
  /// May contain several relocations for the same base/derived pair.
  /// For example this could happen due to relocations on unwinding
  /// path of invoke.
-  std::vector<GCRelocateOperands> getRelocates() const;
+  std::vector<const GCRelocateInst *> getRelocates() const;

  /// Get the experimental_gc_result call tied to this statepoint.  Can be
  /// nullptr if there isn't a gc_result tied to this statepoint.  Guaranteed to
@ -305,33 +305,27 @@ class Statepoint
  explicit Statepoint(CallSite CS) : Base(CS) {}
 };

-/// Wraps a call to a gc.relocate and provides access to it's operands.
-/// TODO: This should likely be refactored to resememble the wrappers in
-/// InstrinsicInst.h.
-class GCRelocateOperands {
-  ImmutableCallSite RelocateCS;
-
+/// This represents the gc.relocate intrinsic.
+class GCRelocateInst : public IntrinsicInst {
 public:
-  GCRelocateOperands(const User *U) : RelocateCS(U) { assert(isGCRelocate(U)); }
-  GCRelocateOperands(const Instruction *inst) : RelocateCS(inst) {
-    assert(isGCRelocate(inst));
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
  }
-  GCRelocateOperands(CallSite CS) : RelocateCS(CS) { assert(isGCRelocate(CS)); }

  /// Return true if this relocate is tied to the invoke statepoint.
  /// This includes relocates which are on the unwinding path.
  bool isTiedToInvoke() const {
-    const Value *Token = RelocateCS.getArgument(0);
+    const Value *Token = getArgOperand(0);

    return isa<LandingPadInst>(Token) || isa<InvokeInst>(Token);
  }

-  /// Get enclosed relocate intrinsic
-  ImmutableCallSite getUnderlyingCallSite() { return RelocateCS; }
-
  /// The statepoint with which this gc.relocate is associated.
-  const Instruction *getStatepoint() {
-    const Value *Token = RelocateCS.getArgument(0);
+  const Instruction *getStatepoint() const {
+    const Value *Token = getArgOperand(0);

    // This takes care both of relocates for call statepoints and relocates
    // on normal path of invoke statepoint.
@ -354,22 +348,22 @@ class GCRelocateOperands {
  /// The index into the associate statepoint's argument list
  /// which contains the base pointer of the pointer whose
  /// relocation this gc.relocate describes.
-  unsigned getBasePtrIndex() {
-    return cast<ConstantInt>(RelocateCS.getArgument(1))->getZExtValue();
+  unsigned getBasePtrIndex() const {
+    return cast<ConstantInt>(getArgOperand(1))->getZExtValue();
  }

  /// The index into the associate statepoint's argument list which
  /// contains the pointer whose relocation this gc.relocate describes.
-  unsigned getDerivedPtrIndex() {
-    return cast<ConstantInt>(RelocateCS.getArgument(2))->getZExtValue();
+  unsigned getDerivedPtrIndex() const {
+    return cast<ConstantInt>(getArgOperand(2))->getZExtValue();
  }

-  Value *getBasePtr() {
+  Value *getBasePtr() const {
    ImmutableCallSite CS(getStatepoint());
    return *(CS.arg_begin() + getBasePtrIndex());
  }

-  Value *getDerivedPtr() {
+  Value *getDerivedPtr() const {
    ImmutableCallSite CS(getStatepoint());
    return *(CS.arg_begin() + getDerivedPtrIndex());
  }
@ -377,11 +371,11 @@ class GCRelocateOperands {

 template <typename FunTy, typename InstructionTy, typename ValueTy,
          typename CallSiteTy>
-std::vector<GCRelocateOperands>
+std::vector<const GCRelocateInst *>
 StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
    const {

-  std::vector<GCRelocateOperands> Result;
+  std::vector<const GCRelocateInst *> Result;

  CallSiteTy StatepointCS = getCallSite();

@ -389,8 +383,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
  // gc_relocates ensures that we only get pairs which are actually relocated
  // and used after the statepoint.
  for (const User *U : getInstruction()->users())
-    if (isGCRelocate(U))
-      Result.push_back(GCRelocateOperands(U));
+    if (auto *Relocate = dyn_cast<GCRelocateInst>(U))
+      Result.push_back(Relocate);

  if (!StatepointCS.isInvoke())
    return Result;
@ -401,8 +395,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()

  // Search for gc relocates that are attached to this landingpad.
  for (const User *LandingPadUser : LandingPad->users()) {
-    if (isGCRelocate(LandingPadUser))
-      Result.push_back(GCRelocateOperands(LandingPadUser));
+    if (auto *Relocate = dyn_cast<GCRelocateInst>(LandingPadUser))
+      Result.push_back(Relocate);
  }
  return Result;
 }
--- a/contrib/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/contrib/llvm/include/llvm/MC/SubtargetFeature.h
@ -39,8 +39,8 @@ class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
  FeatureBitset(const bitset<MAX_SUBTARGET_FEATURES>& B) : bitset(B) {}

  FeatureBitset(std::initializer_list<unsigned> Init) : bitset() {
-    for (auto I = Init.begin() , E = Init.end(); I != E; ++I)
-      set(*I);
+    for (auto I : Init)
+      set(I);
  }
 };

@ -59,6 +59,11 @@ struct SubtargetFeatureKV {
  bool operator<(StringRef S) const {
    return StringRef(Key) < S;
  }
+
+  // Compare routine for std::is_sorted.
+  bool operator<(const SubtargetFeatureKV &Other) const {
+    return StringRef(Key) < StringRef(Other.Key);
+  }
 };

 //===----------------------------------------------------------------------===//
@ -98,14 +103,13 @@ class SubtargetFeatures {
  /// Adding Features.
  void AddFeature(StringRef String, bool Enable = true);

-  /// ToggleFeature - Toggle a feature and returns the newly updated feature
-  /// bits.
-  FeatureBitset ToggleFeature(FeatureBitset Bits, StringRef String,
-                         ArrayRef<SubtargetFeatureKV> FeatureTable);
+  /// ToggleFeature - Toggle a feature and update the feature bits.
+  static void ToggleFeature(FeatureBitset &Bits, StringRef String,
+                            ArrayRef<SubtargetFeatureKV> FeatureTable);

-  /// Apply the feature flag and return the newly updated feature bits.
-  FeatureBitset ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
-                                 ArrayRef<SubtargetFeatureKV> FeatureTable);
+  /// Apply the feature flag and update the feature bits.
+  static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
+                               ArrayRef<SubtargetFeatureKV> FeatureTable);

  /// Get feature bits of a CPU.
  FeatureBitset getFeatureBits(StringRef CPU,
--- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
@ -155,11 +155,36 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName);
 GlobalVariable *createPGOFuncNameVar(Module &M,
                                     GlobalValue::LinkageTypes Linkage,
                                     StringRef FuncName);
+/// Return the initializer in string of the PGO name var \c NameVar.
+StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);

 /// Given a PGO function name, remove the filename prefix and return
 /// the original (static) function name.
 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);

+/// Given a vector of strings (function PGO names) \c NameStrs, the
+/// method generates a combined string \c Result thatis ready to be
+/// serialized.  The \c Result string is comprised of three fields:
+/// The first field is the legnth of the uncompressed strings, and the
+/// the second field is the length of the zlib-compressed string.
+/// Both fields are encoded in ULEB128.  If \c doCompress is false, the
+///  third field is the uncompressed strings; otherwise it is the 
+/// compressed string. When the string compression is off, the 
+/// second field will have value zero.
+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+                              bool doCompression, std::string &Result);
+/// Produce \c Result string with the same format described above. The input
+/// is vector of PGO function name variables that are referenced.
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+                              std::string &Result);
+class InstrProfSymtab;
+/// \c NameStrings is a string composed of one of more sub-strings encoded in
+/// the
+/// format described above. The substrings are seperated by 0 or more zero
+/// bytes.
+/// This method decodes the string and populates the \c Symtab.
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
+
 const std::error_category &instrprof_category();

 enum class instrprof_error {
@ -235,6 +260,11 @@ class InstrProfSymtab {
  /// This interface is used by reader of CoverageMapping test
  /// format.
  inline std::error_code create(StringRef D, uint64_t BaseAddr);
+  /// \c NameStrings is a string composed of one of more sub-strings
+  ///  encoded in the format described above. The substrings are
+  /// seperated by 0 or more zero bytes. This method decodes the
+  /// string and populates the \c Symtab.
+  inline std::error_code create(StringRef NameStrings);
  /// Create InstrProfSymtab from a set of names iteratable from
  /// \p IterRange. This interface is used by IndexedProfReader.
  template <typename NameIterRange> void create(const NameIterRange &IterRange);
@ -255,8 +285,8 @@ class InstrProfSymtab {
    AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
  }
  AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
-  /// Return function's PGO name from the function name's symabol
-  /// address in the object file. If an error occurs, Return
+  /// Return function's PGO name from the function name's symbol
+  /// address in the object file. If an error occurs, return
  /// an empty string.
  StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
  /// Return function's PGO name from the name's md5 hash value.
@ -270,6 +300,12 @@ std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
  return std::error_code();
 }

+std::error_code InstrProfSymtab::create(StringRef NameStrings) {
+  if (readPGOFuncNameStrings(NameStrings, *this))
+    return make_error_code(instrprof_error::malformed);
+  return std::error_code();
+}
+
 template <typename NameIterRange>
 void InstrProfSymtab::create(const NameIterRange &IterRange) {
  for (auto Name : IterRange)
@ -576,8 +612,14 @@ template <class IntPtrT> struct CovMapFunctionRecord {
  #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
  #include "llvm/ProfileData/InstrProfData.inc"
 };
-LLVM_PACKED_END
+// Per module coverage mapping data header, i.e. CoverageMapFileHeader
+// documented above.
+struct CovMapHeader {
+#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};

+LLVM_PACKED_END
 }

 } // end namespace llvm
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
@ -1,4 +1,4 @@
-/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\
+/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\
 |*
 |*                     The LLVM Compiler Infrastructure
 |*
@ -167,6 +167,25 @@ COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
 #undef COVMAP_FUNC_RECORD
 /* COVMAP_FUNC_RECORD end.  */

+/* COVMAP_HEADER start */
+/* Definition of member fields of coverage map header.
+ */
+#ifndef COVMAP_HEADER
+#define COVMAP_HEADER(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+COVMAP_HEADER(uint32_t, Int32Ty, NRecords, \
+              llvm::ConstantInt::get(Int32Ty,  FunctionRecords.size()))
+COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \
+              llvm::ConstantInt::get(Int32Ty, FilenamesSize))
+COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \
+              llvm::ConstantInt::get(Int32Ty, CoverageMappingSize))
+COVMAP_HEADER(uint32_t, Int32Ty, Version, \
+              llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1))
+#undef COVMAP_HEADER
+/* COVMAP_HEADER end.  */
+

 #ifdef INSTR_PROF_VALUE_PROF_DATA
 #define INSTR_PROF_DATA_DEFINED
--- a/contrib/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
@ -213,6 +213,7 @@ ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC)
 ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
 ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
 ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
 // Non-standard Arch names.
 ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE)
 ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE)
--- a/contrib/llvm/include/llvm/Support/Program.h
+++ b/contrib/llvm/include/llvm/Support/Program.h
@ -130,7 +130,7 @@ struct ProcessInfo {

  /// Return true if the given arguments fit within system-specific
  /// argument length limits.
-  bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args);
+  bool commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args);

  /// File encoding options when writing contents that a non-UTF8 tool will
  /// read (on Windows systems). For UNIX, we always use UTF-8.
--- a/contrib/llvm/include/llvm/Support/YAMLParser.h
+++ b/contrib/llvm/include/llvm/Support/YAMLParser.h
@ -305,7 +305,7 @@ class KeyValueNode final : public Node {
 /// increment() which must set CurrentEntry to 0 to create an end iterator.
 template <class BaseT, class ValueT>
 class basic_collection_iterator
-    : public std::iterator<std::forward_iterator_tag, ValueT> {
+    : public std::iterator<std::input_iterator_tag, ValueT> {
 public:
  basic_collection_iterator() : Base(nullptr) {}
  basic_collection_iterator(BaseT *B) : Base(B) {}
@ -326,11 +326,24 @@ class basic_collection_iterator
    return Base->CurrentEntry;
  }

+  /// Note on EqualityComparable:
+  ///
+  /// The iterator is not re-entrant,
+  /// it is meant to be used for parsing YAML on-demand
+  /// Once iteration started - it can point only to one entry at a time
+  /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
+  /// iff Base and Other.Base are equal.
+  bool operator==(const basic_collection_iterator &Other) const {
+    if (Base && (Base == Other.Base)) {
+      assert((Base->CurrentEntry == Other.Base->CurrentEntry)
+             && "Equal Bases expected to point to equal Entries");
+    }
+
+    return Base == Other.Base;
+  }
+
  bool operator!=(const basic_collection_iterator &Other) const {
-    if (Base != Other.Base)
-      return true;
-    return (Base && Other.Base) &&
-           Base->CurrentEntry != Other.Base->CurrentEntry;
+    return !(Base == Other.Base);
  }

  basic_collection_iterator &operator++() {
--- a/contrib/llvm/include/llvm/TableGen/Record.h
+++ b/contrib/llvm/include/llvm/TableGen/Record.h
@ -232,7 +232,7 @@ class Init {
  /// We could pack these a bit tighter by not having the IK_FirstXXXInit
  /// and IK_LastXXXInit be their own values, but that would degrade
  /// readability for really no benefit.
-  enum InitKind {
+  enum InitKind : uint8_t {
    IK_BitInit,
    IK_FirstTypedInit,
    IK_BitsInit,
@ -256,6 +256,9 @@ class Init {

 private:
  const InitKind Kind;
+protected:
+  uint8_t Opc; // Used by UnOpInit, BinOpInit, and TernOpInit
+private:
  Init(const Init &) = delete;
  Init &operator=(const Init &) = delete;
  virtual void anchor();
@ -264,7 +267,7 @@ class Init {
  InitKind getKind() const { return Kind; }

 protected:
-  explicit Init(InitKind K) : Kind(K) {}
+  explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {}

 public:
  virtual ~Init() {}
@ -365,7 +368,8 @@ class TypedInit : public Init {
  TypedInit &operator=(const TypedInit &Other) = delete;

 protected:
-  explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {}
+  explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0)
+    : Init(K, Opc), Ty(T) {}
  ~TypedInit() override {
    // If this is a DefInit we need to delete the RecordRecTy.
    if (getKind() == IK_DefInit)
@ -650,7 +654,8 @@ class OpInit : public TypedInit {
  OpInit &operator=(OpInit &Other) = delete;

 protected:
-  explicit OpInit(InitKind K, RecTy *Type) : TypedInit(K, Type) {}
+  explicit OpInit(InitKind K, RecTy *Type, uint8_t Opc)
+    : TypedInit(K, Type, Opc) {}

 public:
  static bool classof(const Init *I) {
@ -677,14 +682,13 @@ class OpInit : public TypedInit {
 ///
 class UnOpInit : public OpInit {
 public:
-  enum UnaryOp { CAST, HEAD, TAIL, EMPTY };
+  enum UnaryOp : uint8_t { CAST, HEAD, TAIL, EMPTY };

 private:
-  UnaryOp Opc;
  Init *LHS;

  UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type)
-    : OpInit(IK_UnOpInit, Type), Opc(opc), LHS(lhs) {}
+    : OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {}

  UnOpInit(const UnOpInit &Other) = delete;
  UnOpInit &operator=(const UnOpInit &Other) = delete;
@ -708,7 +712,7 @@ class UnOpInit : public OpInit {
    return getOperand();
  }

-  UnaryOp getOpcode() const { return Opc; }
+  UnaryOp getOpcode() const { return (UnaryOp)Opc; }
  Init *getOperand() const { return LHS; }

  // Fold - If possible, fold this to a simpler init.  Return this if not
@ -724,14 +728,14 @@ class UnOpInit : public OpInit {
 ///
 class BinOpInit : public OpInit {
 public:
-  enum BinaryOp { ADD, AND, SHL, SRA, SRL, LISTCONCAT, STRCONCAT, CONCAT, EQ };
+  enum BinaryOp : uint8_t { ADD, AND, SHL, SRA, SRL, LISTCONCAT,
+                            STRCONCAT, CONCAT, EQ };

 private:
-  BinaryOp Opc;
  Init *LHS, *RHS;

  BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) :
-      OpInit(IK_BinOpInit, Type), Opc(opc), LHS(lhs), RHS(rhs) {}
+      OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {}

  BinOpInit(const BinOpInit &Other) = delete;
  BinOpInit &operator=(const BinOpInit &Other) = delete;
@ -759,7 +763,7 @@ class BinOpInit : public OpInit {
    }
  }

-  BinaryOp getOpcode() const { return Opc; }
+  BinaryOp getOpcode() const { return (BinaryOp)Opc; }
  Init *getLHS() const { return LHS; }
  Init *getRHS() const { return RHS; }

@ -776,15 +780,14 @@ class BinOpInit : public OpInit {
 ///
 class TernOpInit : public OpInit {
 public:
-  enum TernaryOp { SUBST, FOREACH, IF };
+  enum TernaryOp : uint8_t { SUBST, FOREACH, IF };

 private:
-  TernaryOp Opc;
  Init *LHS, *MHS, *RHS;

  TernOpInit(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs,
             RecTy *Type) :
-      OpInit(IK_TernOpInit, Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
+      OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {}

  TernOpInit(const TernOpInit &Other) = delete;
  TernOpInit &operator=(const TernOpInit &Other) = delete;
@ -815,7 +818,7 @@ class TernOpInit : public OpInit {
    }
  }

-  TernaryOp getOpcode() const { return Opc; }
+  TernaryOp getOpcode() const { return (TernaryOp)Opc; }
  Init *getLHS() const { return LHS; }
  Init *getMHS() const { return MHS; }
  Init *getRHS() const { return RHS; }
--- a/contrib/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm/include/llvm/Target/Target.td
@ -936,6 +936,10 @@ class AsmParser {
  // ShouldEmitMatchRegisterName - Set to false if the target needs a hand
  // written register name matcher
  bit ShouldEmitMatchRegisterName = 1;
+
+  // HasMnemonicFirst - Set to false if target instructions don't always
+  // start with a mnemonic as the first token.
+  bit HasMnemonicFirst = 1;
 }
 def DefaultAsmParser : AsmParser;

--- a/contrib/llvm/include/llvm/Target/TargetLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetLowering.h
@ -2269,6 +2269,12 @@ class TargetLowering : public TargetLoweringBase {
    return false;
  }

+  /// Return true if the MachineFunction contains a COPY which would imply
+  /// HasOpaqueSPAdjustment.
+  virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const {
+    return false;
+  }
+
  /// Perform necessary initialization to handle a subset of CSRs explicitly
  /// via copies. This function is called at the beginning of instruction
  /// selection.
--- a/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h
@ -23,11 +23,13 @@

 namespace llvm {

-/// This optimization identifies DIV instructions that can be
+/// This optimization identifies DIV instructions in a BB that can be
 /// profitably bypassed and carried out with a shorter, faster divide.
-bool bypassSlowDivision(Function &F,
-                        Function::iterator &I,
-                        const DenseMap<unsigned int, unsigned int> &BypassWidth);
+///
+/// This optimization may add basic blocks immediately after BB; for obvious
+/// reasons, you shouldn't pass those blocks to bypassSlowDivision.
+bool bypassSlowDivision(
+    BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidth);

 } // End llvm namespace

--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@ -16,6 +16,7 @@

 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"

@ -39,6 +40,8 @@ struct LICMSafetyInfo {
  bool MayThrow;           // The current loop contains an instruction which
                           // may throw.
  bool HeaderMayThrow;     // Same as previous, but specific to loop header
+  // Used to update funclet bundle operands.
+  DenseMap<BasicBlock *, ColorVector> BlockColors;
  LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false)
  {}
 };
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@ -543,7 +543,6 @@ static bool isMemsetPattern16(const Function *MS,
        isa<IntegerType>(MemsetType->getParamType(2)))
      return true;
  }
-
  return false;
 }

@ -583,9 +582,6 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
  if (F->onlyAccessesArgMemory())
    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);

-  if (isMemsetPattern16(F, TLI))
-    Min = FMRB_OnlyAccessesArgumentPointees;
-
  // Otherwise be conservative.
  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
 }
@ -599,22 +595,21 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
    case Intrinsic::memset:
    case Intrinsic::memcpy:
    case Intrinsic::memmove:
-      assert((ArgIdx == 0 || ArgIdx == 1) &&
-             "Invalid argument index for memory intrinsic");
-      return ArgIdx ? MRI_Ref : MRI_Mod;
+      // We don't currently have a writeonly attribute.  All other properties
+      // of these intrinsics are nicely described via attributes in
+      // Intrinsics.td and handled generically below.
+      if (ArgIdx == 0)
+        return MRI_Mod;
    }

  // We can bound the aliasing properties of memset_pattern16 just as we can
  // for memcpy/memset.  This is particularly important because the
  // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
-  // whenever possible.
-  if (CS.getCalledFunction() &&
-      isMemsetPattern16(CS.getCalledFunction(), TLI)) {
-    assert((ArgIdx == 0 || ArgIdx == 1) &&
-           "Invalid argument index for memset_pattern16");
-    return ArgIdx ? MRI_Ref : MRI_Mod;
-  }
-  // FIXME: Handle memset_pattern4 and memset_pattern8 also.
+  // whenever possible.  Note that all but the missing writeonly attribute are
+  // handled via InferFunctionAttr.
+  if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
+    if (ArgIdx == 0)
+      return MRI_Mod;

  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
    return MRI_Ref;
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@ -376,15 +376,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
        } else {
          return true; // Argument of an unknown call.
        }
-        // If the Callee is not ReadNone, it may read the global,
-        // and if it is not ReadOnly, it may also write to it.
-        Function *CalleeF = CS.getCalledFunction();
-        if (!CalleeF->doesNotAccessMemory()) {
-          if (Readers)
-            Readers->insert(CalleeF);
-          if (Writers && !CalleeF->onlyReadsMemory())
-            Writers->insert(CalleeF);
-        }
      }
    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
@ -516,7 +507,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {

      if (F->isDeclaration()) {
        // Try to get mod/ref behaviour from function attributes.
-        if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) {
+        if (F->doesNotAccessMemory()) {
          // Can't do better than that!
        } else if (F->onlyReadsMemory()) {
          FI.addModRefInfo(MRI_Ref);
@ -524,12 +515,6 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
            // This function might call back into the module and read a global -
            // consider every global as possibly being read by this function.
            FI.setMayReadAnyGlobal();
-        } else if (F->onlyAccessesArgMemory() || 
-                   F->onlyAccessesInaccessibleMemOrArgMem()) {
-          // This function may only access (read/write) memory pointed to by its
-          // arguments. If this pointer is to a global, this escaping use of the
-          // pointer is captured in AnalyzeUsesOfPointer().
-          FI.addModRefInfo(MRI_ModRef);
        } else {
          FI.addModRefInfo(MRI_ModRef);
          // Can't say anything useful unless it's an intrinsic - they don't
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@ -187,13 +187,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
  return getAllocationData(V, AllocLike, TLI, LookThroughBitCast);
 }

-/// \brief Tests if a value is a call or invoke to a library function that
-/// allocates memory and never returns null (such as operator new).
-bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
-                               bool LookThroughBitCast) {
-  return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
-}
-
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
 /// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
 /// ignore InvokeInst here.
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@ -477,7 +477,7 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
  // being 42. A key property of this program however is that if either
  // 1 or 4 were missing, there would be a race between the store of 42
  // either the store of 0 or the load (making the whole progam racy).
-  // The paper mentionned above shows that the same property is respected
+  // The paper mentioned above shows that the same property is respected
  // by every program that can detect any optimisation of that kind: either
  // it is racy (undefined) or there is a release followed by an acquire
  // between the pair of accesses under consideration.
@ -685,13 +685,13 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
        return MemDepResult::getDef(Inst);
      if (isInvariantLoad)
        continue;
-      // Be conservative if the accessed pointer may alias the allocation.
-      if (AA->alias(Inst, AccessPtr) != NoAlias)
-        return MemDepResult::getClobber(Inst);
-      // If the allocation is not aliased and does not read memory (like
-      // strdup), it is safe to ignore.
-      if (isa<AllocaInst>(Inst) ||
-          isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
+      // Be conservative if the accessed pointer may alias the allocation -
+      // fallback to the generic handling below.
+      if ((AA->alias(Inst, AccessPtr) == NoAlias) &&
+          // If the allocation is not aliased and does not read memory (like
+          // strdup), it is safe to ignore.
+          (isa<AllocaInst>(Inst) || isMallocLikeFn(Inst, TLI) ||
+           isCallocLikeFn(Inst, TLI)))
        continue;
    }

@ -792,10 +792,8 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
 static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
                         int Count = -1) {
  if (Count == -1) Count = Cache.size();
-  if (Count == 0) return;
-
-  for (unsigned i = 1; i != unsigned(Count); ++i)
-    assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
+  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
+         "Cache isn't sorted!");
 }
 #endif

--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@ -52,14 +52,13 @@ static bool hasSinCosPiStret(const Triple &T) {
 /// specified target triple.  This should be carefully written so that a missing
 /// target triple gets a sane set of defaults.
 static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
-                       const char *const *StandardNames) {
-#ifndef NDEBUG
+                       ArrayRef<const char *> StandardNames) {
  // Verify that the StandardNames array is in alphabetical order.
-  for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
-    if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
-      llvm_unreachable("TargetLibraryInfoImpl function names must be sorted");
-  }
-#endif // !NDEBUG
+  assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
+                        [](const char *LHS, const char *RHS) {
+                          return strcmp(LHS, RHS) < 0;
+                        }) &&
+         "TargetLibraryInfoImpl function names must be sorted");

  if (T.getArch() == Triple::r600 ||
      T.getArch() == Triple::amdgcn) {
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@ -1743,9 +1743,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
    return false;

  Value *X = nullptr, *Y = nullptr;
-  // A shift of a power of two is a power of two or zero.
+  // A shift left or a logical shift right of a power of two is a power of two
+  // or zero.
  if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
-                 match(V, m_Shr(m_Value(X), m_Value()))))
+                 match(V, m_LShr(m_Value(X), m_Value()))))
    return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);

  if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
@ -2829,7 +2830,12 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
                                              const DataLayout &DL) {
  unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
  APInt ByteOffset(BitWidth, 0);
-  while (1) {
+
+  // We walk up the defs but use a visited set to handle unreachable code. In
+  // that case, we stop after accumulating the cycle once (not that it
+  // matters).
+  SmallPtrSet<Value *, 16> Visited;
+  while (Visited.insert(Ptr).second) {
    if (Ptr->getType()->isVectorTy())
      break;

@ -3268,12 +3274,9 @@ static bool isDereferenceableAndAlignedPointer(
  }

  // For gc.relocate, look through relocations
-  if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
-    if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
-      GCRelocateOperands RelocateInst(I);
-      return isDereferenceableAndAlignedPointer(
-          RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
-    }
+  if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
+    return isDereferenceableAndAlignedPointer(
+        RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);

  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
    return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
@ -3474,10 +3477,6 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
    if (CS.isReturnNonNull())
      return true;

-  // operator new never returns null.
-  if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
-    return true;
-
  return false;
 }

--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@ -3071,7 +3071,12 @@ void BitcodeReader::saveMetadataList(
  for (unsigned ID = 0; ID < MetadataList.size(); ++ID) {
    Metadata *MD = MetadataList[ID];
    auto *N = dyn_cast_or_null<MDNode>(MD);
+    assert((!N || (N->isResolved() || N->isTemporary())) &&
+           "Found non-resolved non-temp MDNode while saving metadata");
    // Save all values if !OnlyTempMD, otherwise just the temporary metadata.
+    // Note that in the !OnlyTempMD case we need to save all Metadata, not
+    // just MDNode, as we may have references to other types of module-level
+    // metadata (e.g. ValueAsMetadata) from instructions.
    if (!OnlyTempMD || (N && N->isTemporary())) {
      // Will call this after materializing each function, in order to
      // handle remapping of the function's instructions/metadata.
@ -3080,6 +3085,11 @@ void BitcodeReader::saveMetadataList(
        assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id");
        continue;
      }
+      if (N && N->isTemporary())
+        // Ensure that we assert if someone tries to RAUW this temporary
+        // metadata while it is the key of a map. The flag will be set back
+        // to true when the saved metadata list is destroyed.
+        N->setCanReplace(false);
      MetadataToIDs[MD] = ID;
    }
  }
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@ -976,32 +976,32 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
  }
 }

-static int getRank(const WinEHFuncInfo &FuncInfo, int State) {
+static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) {
  int Rank = 0;
  while (State != -1) {
    ++Rank;
-    State = FuncInfo.ClrEHUnwindMap[State].Parent;
+    State = FuncInfo.ClrEHUnwindMap[State].TryParentState;
  }
  return Rank;
 }

-static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
-  int LeftRank = getRank(FuncInfo, Left);
-  int RightRank = getRank(FuncInfo, Right);
+static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
+  int LeftRank = getTryRank(FuncInfo, Left);
+  int RightRank = getTryRank(FuncInfo, Right);

  while (LeftRank < RightRank) {
-    Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+    Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
    --RightRank;
  }

  while (RightRank < LeftRank) {
-    Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+    Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
    --LeftRank;
  }

  while (Left != Right) {
-    Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
-    Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+    Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
+    Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
  }

  return Left;
@ -1035,9 +1035,9 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
        FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
    HandlerStates[HandlerBlock] = State;
    // Use this loop through all handlers to verify our assumption (used in
-    // the MinEnclosingState computation) that ancestors have lower state
-    // numbers than their descendants.
-    assert(FuncInfo.ClrEHUnwindMap[State].Parent < State &&
+    // the MinEnclosingState computation) that enclosing funclets have lower
+    // state numbers than their enclosed funclets.
+    assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State &&
           "ill-formed state numbering");
  }
  // Map the main function to the NullState.
@ -1070,7 +1070,6 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
  SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);

  // Visit the root function and each funclet.
-
  for (MachineFunction::const_iterator FuncletStart = MF->begin(),
                                       FuncletEnd = MF->begin(),
                                       End = MF->end();
@ -1100,17 +1099,18 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
    for (const auto &StateChange :
         InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
      // Close any try regions we're not still under
-      int AncestorState =
-          getAncestor(FuncInfo, CurrentState, StateChange.NewState);
-      while (CurrentState != AncestorState) {
-        assert(CurrentState != NullState && "Failed to find ancestor!");
+      int StillPendingState =
+          getTryAncestor(FuncInfo, CurrentState, StateChange.NewState);
+      while (CurrentState != StillPendingState) {
+        assert(CurrentState != NullState &&
+               "Failed to find still-pending state!");
        // Close the pending clause
        Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
                           CurrentState, FuncletState});
-        // Now the parent handler is current
-        CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent;
+        // Now the next-outer try region is current
+        CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState;
        // Pop the new start label from the handler stack if we've exited all
-        // descendants of the corresponding handler.
+        // inner try regions of the corresponding try region.
        if (HandlerStack.back().second == CurrentState)
          CurrentStartLabel = HandlerStack.pop_back_val().first;
      }
@ -1121,7 +1121,8 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
        // it.
        for (int EnteredState = StateChange.NewState;
             EnteredState != CurrentState;
-             EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) {
+             EnteredState =
+                 FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) {
          int &MinEnclosingState = MinClauseMap[EnteredState];
          if (FuncletState < MinEnclosingState)
            MinEnclosingState = FuncletState;
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@ -225,8 +225,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
    const DenseMap<unsigned int, unsigned int> &BypassWidths =
       TLI->getBypassSlowDivWidths();
-    for (Function::iterator I = F.begin(); I != F.end(); I++)
-      EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
+    BasicBlock* BB = &*F.begin();
+    while (BB != nullptr) {
+      // bypassSlowDivision may create new BBs, but we don't want to reapply the
+      // optimization to those blocks.
+      BasicBlock* Next = BB->getNextNode();
+      EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+      BB = Next;
+    }
  }

  // Eliminate blocks that contain only PHI nodes and an
@ -526,19 +532,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
 // Computes a map of base pointer relocation instructions to corresponding
 // derived pointer relocation instructions given a vector of all relocate calls
 static void computeBaseDerivedRelocateMap(
-    const SmallVectorImpl<User *> &AllRelocateCalls,
-    DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
-        RelocateInstMap) {
+    const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
+    DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
+        &RelocateInstMap) {
  // Collect information in two maps: one primarily for locating the base object
  // while filling the second map; the second map is the final structure holding
  // a mapping between Base and corresponding Derived relocate calls
-  DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
-  for (auto &U : AllRelocateCalls) {
-    GCRelocateOperands ThisRelocate(U);
-    IntrinsicInst *I = cast<IntrinsicInst>(U);
-    auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
-                            ThisRelocate.getDerivedPtrIndex());
-    RelocateIdxMap.insert(std::make_pair(K, I));
+  DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
+  for (auto *ThisRelocate : AllRelocateCalls) {
+    auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
+                            ThisRelocate->getDerivedPtrIndex());
+    RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
  }
  for (auto &Item : RelocateIdxMap) {
    std::pair<unsigned, unsigned> Key = Item.first;
@ -546,7 +550,7 @@ static void computeBaseDerivedRelocateMap(
      // Base relocation: nothing to insert
      continue;

-    IntrinsicInst *I = Item.second;
+    GCRelocateInst *I = Item.second;
    auto BaseKey = std::make_pair(Key.first, Key.first);

    // We're iterating over RelocateIdxMap so we cannot modify it.
@ -579,16 +583,13 @@ static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
 // replace, computes a replacement, and affects it.
 static bool
-simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
-                          const SmallVectorImpl<IntrinsicInst *> &Targets) {
+simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
+                          const SmallVectorImpl<GCRelocateInst *> &Targets) {
  bool MadeChange = false;
-  for (auto &ToReplace : Targets) {
-    GCRelocateOperands MasterRelocate(RelocatedBase);
-    GCRelocateOperands ThisRelocate(ToReplace);
-
-    assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
+  for (GCRelocateInst *ToReplace : Targets) {
+    assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
           "Not relocating a derived object of the original base object");
-    if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
+    if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
      // A duplicate relocate call. TODO: coalesce duplicates.
      continue;
    }
@ -601,8 +602,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
      continue;
    }

-    Value *Base = ThisRelocate.getBasePtr();
-    auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
+    Value *Base = ToReplace->getBasePtr();
+    auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
    if (!Derived || Derived->getPointerOperand() != Base)
      continue;

@ -680,12 +681,12 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
 // %val = load %ptr'
 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
  bool MadeChange = false;
-  SmallVector<User *, 2> AllRelocateCalls;
+  SmallVector<GCRelocateInst *, 2> AllRelocateCalls;

  for (auto *U : I.users())
-    if (isGCRelocate(dyn_cast<Instruction>(U)))
+    if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
      // Collect all the relocate calls associated with a statepoint
-      AllRelocateCalls.push_back(U);
+      AllRelocateCalls.push_back(Relocate);

  // We need atleast one base pointer relocation + one derived pointer
  // relocation to mangle
@ -694,7 +695,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {

  // RelocateInstMap is a mapping from the base relocate instruction to the
  // corresponding derived relocate instructions
-  DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
+  DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
  computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
  if (RelocateInstMap.empty())
    return false;
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@ -122,8 +122,7 @@ INITIALIZE_PASS_END(MachineCSE, "machine-cse",
 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
                                               MachineBasicBlock *MBB) {
  bool Changed = false;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (MachineOperand &MO : MI->operands()) {
    if (!MO.isReg() || !MO.isUse())
      continue;
    unsigned Reg = MO.getReg();
@ -186,8 +185,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
      return true;

    bool SeenDef = false;
-    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = I->getOperand(i);
+    for (const MachineOperand &MO : I->operands()) {
      if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
        SeenDef = true;
      if (!MO.isReg() || !MO.getReg())
@ -220,8 +218,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                       SmallVectorImpl<unsigned> &PhysDefs,
                                       bool &PhysUseDef) const{
  // First, add all uses to PhysRefs.
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
    if (!MO.isReg() || MO.isDef())
      continue;
    unsigned Reg = MO.getReg();
@ -239,8 +236,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
  // (which currently contains only uses), set the PhysUseDef flag.
  PhysUseDef = false;
  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
    if (!MO.isReg() || !MO.isDef())
      continue;
    unsigned Reg = MO.getReg();
@ -311,8 +307,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
    if (I == E)
      return true;

-    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = I->getOperand(i);
+    for (const MachineOperand &MO : I->operands()) {
      // RegMasks go on instructions like calls that clobber lots of physregs.
      // Don't attempt to CSE across such an instruction.
      if (MO.isRegMask())
@ -398,8 +393,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  // Heuristics #2: If the expression doesn't not use a vr and the only use
  // of the redundant computation are copies, do not cse.
  bool HasVRegUse = false;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
    if (MO.isReg() && MO.isUse() &&
        TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
      HasVRegUse = true;
@ -580,9 +574,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {

    // Actually perform the elimination.
    if (DoCSE) {
-      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
-        unsigned OldReg = CSEPairs[i].first;
-        unsigned NewReg = CSEPairs[i].second;
+      for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
+        unsigned OldReg = CSEPair.first;
+        unsigned NewReg = CSEPair.second;
        // OldReg may have been unused but is used now, clear the Dead flag
        MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
        assert(Def != nullptr && "CSEd register has no unique definition?");
@ -594,8 +588,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {

      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
      // we should make sure it is not dead at CSMI.
-      for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
-        CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+      for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
+        CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);

      // Go through implicit defs of CSMI and MI, and clear the kill flags on
      // their uses in all the instructions between CSMI and MI.
@ -685,18 +679,14 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
    Node = WorkList.pop_back_val();
    Scopes.push_back(Node);
    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
-    unsigned NumChildren = Children.size();
-    OpenChildren[Node] = NumChildren;
-    for (unsigned i = 0; i != NumChildren; ++i) {
-      MachineDomTreeNode *Child = Children[i];
+    OpenChildren[Node] = Children.size();
+    for (MachineDomTreeNode *Child : Children)
      WorkList.push_back(Child);
-    }
  } while (!WorkList.empty());

  // Now perform CSE.
  bool Changed = false;
-  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
-    MachineDomTreeNode *Node = Scopes[i];
+  for (MachineDomTreeNode *Node : Scopes) {
    MachineBasicBlock *MBB = Node->getBlock();
    EnterScope(MBB);
    Changed |= ProcessBlock(MBB);
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@ -866,6 +866,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
  setMemRefs(NewMemRefs, NewMemRefs + NewNum);
 }

+std::pair<MachineInstr::mmo_iterator, unsigned>
+MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
+  // TODO: If we end up with too many memory operands, return the empty
+  // conservative set rather than failing asserts.
+  // TODO: consider uniquing elements within the operand lists to reduce
+  // space usage and fall back to conservative information less often.
+  size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin())
+    + (Other.memoperands_end() - Other.memoperands_begin());
+
+  MachineFunction *MF = getParent()->getParent();
+  mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
+  mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
+                                  MemBegin);
+  MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
+                     MemEnd);
+  assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
+         "missing memrefs");
+  
+  return std::make_pair(MemBegin, CombinedNumMemRefs);
+}
+
 bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
  assert(!isBundledWithPred() && "Must be called on bundle header");
  for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
@ -1738,7 +1759,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
  bool HaveSemi = false;
  const unsigned PrintableFlags = FrameSetup | FrameDestroy;
  if (Flags & PrintableFlags) {
-    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    if (!HaveSemi) {
+      OS << ";";
+      HaveSemi = true;
+    }
    OS << " flags: ";

    if (Flags & FrameSetup)
@ -1749,7 +1773,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
  }

  if (!memoperands_empty()) {
-    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    if (!HaveSemi) {
+      OS << ";";
+      HaveSemi = true;
+    }

    OS << " mem:";
    for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
@ -1762,7 +1789,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,

  // Print the regclass of any virtual registers encountered.
  if (MRI && !VirtRegs.empty()) {
-    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    if (!HaveSemi) {
+      OS << ";";
+      HaveSemi = true;
+    }
    for (unsigned i = 0; i != VirtRegs.size(); ++i) {
      const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
      OS << " " << TRI->getRegClassName(RC)
@ -1781,7 +1811,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,

  // Print debug location information.
  if (isDebugValue() && getOperand(e - 2).isMetadata()) {
-    if (!HaveSemi) OS << ";";
+    if (!HaveSemi)
+      OS << ";";
    auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
    OS << " line no:" <<  DV->getLine();
    if (auto *InlinedAt = debugLoc->getInlinedAt()) {
@ -1795,7 +1826,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
    if (isIndirectDebugValue())
      OS << " indirect";
  } else if (debugLoc && MF) {
-    if (!HaveSemi) OS << ";";
+    if (!HaveSemi)
+      OS << ";";
    OS << " dbg:";
    debugLoc.print(OS);
  }
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@ -315,7 +315,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
    if (!TRI->regsOverlap(MOReg, Reg))
      continue;

-    bool Covered = TRI->isSuperRegisterEq(MOReg, Reg);
+    bool Covered = TRI->isSuperRegisterEq(Reg, MOReg);
    if (MO.readsReg()) {
      PRI.Read = true;
      if (Covered) {
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@ -97,9 +97,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
    unsigned Weight = PSetI.getWeight();
    for (; PSetI.isValid(); ++PSetI) {
      CurrSetPressure[*PSetI] += Weight;
-      if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) {
-        P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI];
-      }
+      P.MaxSetPressure[*PSetI] =
+          std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
    }
  }
 }
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  uint64_t PtrOff = ShAmt / 8;
  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
  SDLoc DL(LN0);
+  // The original load itself didn't wrap, so an offset within it doesn't.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
                               PtrType, LN0->getBasePtr(),
-                               DAG.getConstant(PtrOff, DL, PtrType));
+                               DAG.getConstant(PtrOff, DL, PtrType),
+                               &Flags);
  AddToWorklist(NewPtr.getNode());

  SDValue Load;
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -2843,6 +2843,43 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
  return (AZero | BZero).isAllOnesValue();
 }

+static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops,
+                                  llvm::SelectionDAG &DAG) {
+  if (Ops.size() == 1)
+    return Ops[0];
+
+  // Concat of UNDEFs is UNDEF.
+  if (std::all_of(Ops.begin(), Ops.end(),
+                  [](SDValue Op) { return Op.isUndef(); }))
+    return DAG.getUNDEF(VT);
+
+  // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified
+  // to one big BUILD_VECTOR.
+  // FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well.
+  if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) {
+        return Op.getOpcode() == ISD::BUILD_VECTOR;
+      }))
+    return SDValue();
+
+  EVT SVT = VT.getScalarType();
+  SmallVector<SDValue, 16> Elts;
+  for (SDValue Op : Ops)
+    Elts.append(Op->op_begin(), Op->op_end());
+
+  // BUILD_VECTOR requires all inputs to be of the same type, find the
+  // maximum type and extend them all.
+  for (SDValue Op : Elts)
+    SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+
+  if (SVT.bitsGT(VT.getScalarType()))
+    for (SDValue &Op : Elts)
+      Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+               ? DAG.getZExtOrTrunc(Op, DL, SVT)
+               : DAG.getSExtOrTrunc(Op, DL, SVT);
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
+}
+
 /// getNode - Gets or creates the specified node.
 ///
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
@ -3426,34 +3463,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
    if (N2.getOpcode() == ISD::EntryToken) return N1;
    if (N1 == N2) return N1;
    break;
-  case ISD::CONCAT_VECTORS:
-    // Concat of UNDEFs is UNDEF.
-    if (N1.getOpcode() == ISD::UNDEF &&
-        N2.getOpcode() == ISD::UNDEF)
-      return getUNDEF(VT);
-
-    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
-    // one big BUILD_VECTOR.
-    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
-        N2.getOpcode() == ISD::BUILD_VECTOR) {
-      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
-                                    N1.getNode()->op_end());
-      Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
-
-      // BUILD_VECTOR requires all inputs to be of the same type, find the
-      // maximum type and extend them all.
-      EVT SVT = VT.getScalarType();
-      for (SDValue Op : Elts)
-        SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
-      if (SVT.bitsGT(VT.getScalarType()))
-        for (SDValue &Op : Elts)
-          Op = TLI->isZExtFree(Op.getValueType(), SVT)
-             ? getZExtOrTrunc(Op, DL, SVT)
-             : getSExtOrTrunc(Op, DL, SVT);
-
-      return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
-    }
+  case ISD::CONCAT_VECTORS: {
+    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+    SDValue Ops[] = {N1, N2};
+    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+      return V;
    break;
+  }
  case ISD::AND:
    assert(VT.isInteger() && "This operator does not apply to FP types!");
    assert(N1.getValueType() == N2.getValueType() &&
@ -3911,19 +3927,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
    }
    break;
  }
-  case ISD::CONCAT_VECTORS:
-    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
-    // one big BUILD_VECTOR.
-    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
-        N2.getOpcode() == ISD::BUILD_VECTOR &&
-        N3.getOpcode() == ISD::BUILD_VECTOR) {
-      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
-                                    N1.getNode()->op_end());
-      Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
-      Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
-      return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
-    }
+  case ISD::CONCAT_VECTORS: {
+    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+    SDValue Ops[] = {N1, N2, N3};
+    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+      return V;
    break;
+  }
  case ISD::SETCC: {
    // Use FoldSetCC to simplify SETCC's.
    if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
@ -5462,6 +5472,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,

  switch (Opcode) {
  default: break;
+  case ISD::CONCAT_VECTORS: {
+    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+      return V;
+    break;
+  }
  case ISD::SELECT_CC: {
    assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
    assert(Ops[0].getValueType() == Ops[1].getValueType() &&
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
    unsigned NumValues = ValueVTs.size();

+    // An aggregate return value cannot wrap around the address space, so
+    // offsets to its parts don't wrap either.
+    SDNodeFlags Flags;
+    Flags.setNoUnsignedWrap(true);
+
    SmallVector<SDValue, 4> Chains(NumValues);
    for (unsigned i = 0; i != NumValues; ++i) {
      SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
                                RetPtr.getValueType(), RetPtr,
                                DAG.getIntPtrConstant(Offsets[i],
-                                                      getCurSDLoc()));
+                                                      getCurSDLoc()),
+                                &Flags);
      Chains[i] =
        DAG.getStore(Chain, getCurSDLoc(),
                     SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
      if (Field) {
        // N = N + Offset
        uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+
+        // In an inbouds GEP with an offset that is nonnegative even when
+        // interpreted as signed, assume there is no unsigned overflow.
+        SDNodeFlags Flags;
+        if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
+          Flags.setNoUnsignedWrap(true);
+
        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
-                        DAG.getConstant(Offset, dl, N.getValueType()));
+                        DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
      }

      Ty = StTy->getElementType(Field);
@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
        SDValue OffsVal = VectorWidth ?
          DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
          DAG.getConstant(Offs, dl, PtrTy);
-        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
+
+        // In an inbouds GEP with an offset that is nonnegative even when
+        // interpreted as signed, assume there is no unsigned overflow.
+        SDNodeFlags Flags;
+        if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
+          Flags.setNoUnsignedWrap(true);
+
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
        continue;
      }

@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
    Align = 0;

  // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size.
+  // by add SA-1 to the size. This doesn't overflow because we're computing
+  // an address inside an alloca.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
  AllocSize = DAG.getNode(ISD::ADD, dl,
                          AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign - 1, dl));
+                          DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);

  // Mask out the low bits for alignment purposes.
  AllocSize = DAG.getNode(ISD::AND, dl,
@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
  if (isVolatile)
    Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);

+  // An aggregate load cannot wrap around the address space, so offsets to its
+  // parts don't wrap either.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
+
  SmallVector<SDValue, 4> Values(NumValues);
  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
  EVT PtrVT = Ptr.getValueType();
@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
    }
    SDValue A = DAG.getNode(ISD::ADD, dl,
                            PtrVT, Ptr,
-                            DAG.getConstant(Offsets[i], dl, PtrVT));
+                            DAG.getConstant(Offsets[i], dl, PtrVT),
+                            &Flags);
    SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
                            isNonTemporal, isInvariant, Alignment, AAInfo,
@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
  AAMDNodes AAInfo;
  I.getAAMetadata(AAInfo);

+  // An aggregate load cannot wrap around the address space, so offsets to its
+  // parts don't wrap either.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
+
  unsigned ChainI = 0;
  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
    // See visitLoad comments.
@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
      ChainI = 0;
    }
    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
-                              DAG.getConstant(Offsets[i], dl, PtrVT));
+                              DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
    SDValue St = DAG.getStore(Root, dl,
                              SDValue(Src.getNode(), Src.getResNo() + i),
                              Add, MachinePointerInfo(PtrV, Offsets[i]),
@ -5189,7 +5223,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    return nullptr;
  }
  case Intrinsic::experimental_gc_relocate: {
-    visitGCRelocate(I);
+    visitGCRelocate(cast<GCRelocateInst>(I));
    return nullptr;
  }
  case Intrinsic::instrprof_increment:
@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
    ReturnValues.resize(NumValues);
    SmallVector<SDValue, 4> Chains(NumValues);

+    // An aggregate return value cannot wrap around the address space, so
+    // offsets to its parts don't wrap either.
+    SDNodeFlags Flags;
+    Flags.setNoUnsignedWrap(true);
+
    for (unsigned i = 0; i < NumValues; ++i) {
      SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
                                    CLI.DAG.getConstant(Offsets[i], CLI.DL,
-                                                        PtrVT));
+                                                        PtrVT), &Flags);
      SDValue L = CLI.DAG.getLoad(
          RetTys[i], CLI.DL, CLI.Chain, Add,
          MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@ -855,7 +855,7 @@ class SelectionDAGBuilder {

  // These three are implemented in StatepointLowering.cpp
  void visitStatepoint(const CallInst &I);
-  void visitGCRelocate(const CallInst &I);
+  void visitGCRelocate(const GCRelocateInst &I);
  void visitGCResult(const CallInst &I);

  void visitUserOp1(const Instruction &I) {
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@ -633,6 +633,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
    MRI.replaceRegWith(From, To);
  }

+  if (TLI->hasCopyImplyingStackAdjustment(MF))
+    MFI->setHasOpaqueSPAdjustment(true);
+
  // Freeze the set of reserved registers now that MachineFrameInfo has been
  // set up. All the information required by getReservedRegs() should be
  // available now.
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@ -128,13 +128,11 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
    return Optional<int>();

  // Spill location is known for gc relocates
-  if (isGCRelocate(Val)) {
-    GCRelocateOperands RelocOps(cast<Instruction>(Val));
-
+  if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
    FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
-        Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()];
+        Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()];

-    auto It = SpillMap.find(RelocOps.getDerivedPtr());
+    auto It = SpillMap.find(Relocate->getDerivedPtr());
    if (It == SpillMap.end())
      return Optional<int>();

@ -401,10 +399,10 @@ static void getIncomingStatepointGCValues(
    SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
    SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
    SelectionDAGBuilder &Builder) {
-  for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) {
-    Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
-    Bases.push_back(relocateOpers.getBasePtr());
-    Ptrs.push_back(relocateOpers.getDerivedPtr());
+  for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
+    Relocs.push_back(Relocate);
+    Bases.push_back(Relocate->getBasePtr());
+    Ptrs.push_back(Relocate->getDerivedPtr());
  }

  // Remove any redundant llvm::Values which map to the same SDValue as another
@ -602,8 +600,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
  FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
    Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];

-  for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) {
-    const Value *V = RelocateOpers.getDerivedPtr();
+  for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
+    const Value *V = Relocate->getDerivedPtr();
    SDValue SDV = Builder.getValue(V);
    SDValue Loc = Builder.StatepointLowering.getLocation(SDV);

@ -624,8 +622,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
      // uses of the corresponding values so that it would automatically
      // export them. Relocates of the spilled values does not use original
      // value.
-      if (RelocateOpers.getUnderlyingCallSite().getParent() !=
-          StatepointInstr->getParent())
+      if (Relocate->getParent() != StatepointInstr->getParent())
        Builder.ExportFromCurrentBlock(V);
    }
  }
@ -656,7 +653,7 @@ void SelectionDAGBuilder::LowerStatepoint(
  // statepoint.
  for (const User *U : CS->users()) {
    const CallInst *Call = cast<CallInst>(U);
-    if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
+    if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent())
      StatepointLowering.scheduleRelocCall(*Call);
  }
 #endif
@ -859,24 +856,22 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
  }
 }

-void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
-  GCRelocateOperands RelocateOpers(&CI);
-
+void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
 #ifndef NDEBUG
  // Consistency check
  // We skip this check for relocates not in the same basic block as thier
  // statepoint. It would be too expensive to preserve validation info through
  // different basic blocks.
-  if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
-    StatepointLowering.relocCallVisited(CI);
+  if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) {
+    StatepointLowering.relocCallVisited(Relocate);
  }
 #endif

-  const Value *DerivedPtr = RelocateOpers.getDerivedPtr();
+  const Value *DerivedPtr = Relocate.getDerivedPtr();
  SDValue SD = getValue(DerivedPtr);

  FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
-    FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()];
+    FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()];

  // We should have recorded location for this pointer
  assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value");
@ -885,7 +880,7 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
  // We didn't need to spill these special cases (constants and allocas).
  // See the handling in spillIncomingValueForStatepoint for detail.
  if (!DerivedPtrLocation) {
-    setValue(&CI, SD);
+    setValue(&Relocate, SD);
    return;
  }

@ -907,5 +902,5 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
  DAG.setRoot(SpillLoad.getValue(1));

  assert(SpillLoad.getNode());
-  setValue(&CI, SpillLoad);
+  setValue(&Relocate, SpillLoad);
 }
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@ -212,7 +212,7 @@ unsigned TargetSchedModel::computeOperandLatency(
      && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
      && SchedModel.isComplete()) {
    errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
-           << *DefMI;
+           << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
    llvm_unreachable("incomplete machine model");
  }
 #endif
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@ -17,11 +17,14 @@
 //===----------------------------------------------------------------------===//

 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
@ -435,11 +438,12 @@ void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
  calculateStateNumbersForInvokes(Fn, FuncInfo);
 }

-static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState,
-                           ClrHandlerType HandlerType, uint32_t TypeToken,
-                           const BasicBlock *Handler) {
+static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
+                           int TryParentState, ClrHandlerType HandlerType,
+                           uint32_t TypeToken, const BasicBlock *Handler) {
  ClrEHUnwindMapEntry Entry;
-  Entry.Parent = ParentState;
+  Entry.HandlerParentState = HandlerParentState;
+  Entry.TryParentState = TryParentState;
  Entry.Handler = Handler;
  Entry.HandlerType = HandlerType;
  Entry.TypeToken = TypeToken;
@ -453,82 +457,199 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
  if (!FuncInfo.EHPadStateMap.empty())
    return;

-  SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
+  // This numbering assigns one state number to each catchpad and cleanuppad.
+  // It also computes two tree-like relations over states:
+  // 1) Each state has a "HandlerParentState", which is the state of the next
+  //    outer handler enclosing this state's handler (same as nearest ancestor
+  //    per the ParentPad linkage on EH pads, but skipping over catchswitches).
+  // 2) Each state has a "TryParentState", which:
+  //    a) for a catchpad that's not the last handler on its catchswitch, is
+  //       the state of the next catchpad on that catchswitch
+  //    b) for all other pads, is the state of the pad whose try region is the
+  //       next outer try region enclosing this state's try region.  The "try
+  //       regions are not present as such in the IR, but will be inferred
+  //       based on the placement of invokes and pads which reach each other
+  //       by exceptional exits
+  // Catchswitches do not get their own states, but each gets mapped to the
+  // state of its first catchpad.

-  // Each pad needs to be able to refer to its parent, so scan the function
-  // looking for top-level handlers and seed the worklist with them.
+  // Step one: walk down from outermost to innermost funclets, assigning each
+  // catchpad and cleanuppad a state number.  Add an entry to the
+  // ClrEHUnwindMap for each state, recording its HandlerParentState and
+  // handler attributes.  Record the TryParentState as well for each catchpad
+  // that's not the last on its catchswitch, but initialize all other entries'
+  // TryParentStates to a sentinel -1 value that the next pass will update.
+
+  // Seed a worklist with pads that have no parent.
+  SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
  for (const BasicBlock &BB : *Fn) {
-    if (!BB.isEHPad())
-      continue;
-    if (BB.isLandingPad())
-      report_fatal_error("CoreCLR EH cannot use landingpads");
    const Instruction *FirstNonPHI = BB.getFirstNonPHI();
-    if (!isTopLevelPadForMSVC(FirstNonPHI))
+    const Value *ParentPad;
+    if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI))
+      ParentPad = CPI->getParentPad();
+    else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI))
+      ParentPad = CSI->getParentPad();
+    else
      continue;
-    // queue this with sentinel parent state -1 to mean unwind to caller.
-    Worklist.emplace_back(FirstNonPHI, -1);
+    if (isa<ConstantTokenNone>(ParentPad))
+      Worklist.emplace_back(FirstNonPHI, -1);
  }

+  // Use the worklist to visit all pads, from outer to inner.  Record
+  // HandlerParentState for all pads.  Record TryParentState only for catchpads
+  // that aren't the last on their catchswitch (setting all other entries'
+  // TryParentStates to an initial value of -1).  This loop is also responsible
+  // for setting the EHPadStateMap entry for all catchpads, cleanuppads, and
+  // catchswitches.
  while (!Worklist.empty()) {
    const Instruction *Pad;
-    int ParentState;
-    std::tie(Pad, ParentState) = Worklist.pop_back_val();
+    int HandlerParentState;
+    std::tie(Pad, HandlerParentState) = Worklist.pop_back_val();

-    Value *ParentPad;
-    int PredState;
-    if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
-      // A cleanup can have multiple exits; don't re-process after the first.
-      if (FuncInfo.EHPadStateMap.count(Cleanup))
-        continue;
-      // CoreCLR personality uses arity to distinguish faults from finallies.
-      const BasicBlock *PadBlock = Cleanup->getParent();
+    if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
+      // Create the entry for this cleanup with the appropriate handler
+      // properties.  Finaly and fault handlers are distinguished by arity.
      ClrHandlerType HandlerType =
-          (Cleanup->getNumOperands() ? ClrHandlerType::Fault
-                                     : ClrHandlerType::Finally);
-      int NewState =
-          addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock);
-      FuncInfo.EHPadStateMap[Cleanup] = NewState;
-      // Propagate the new state to all preds of the cleanup
-      ParentPad = Cleanup->getParentPad();
-      PredState = NewState;
-    } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
-      SmallVector<const CatchPadInst *, 1> Handlers;
-      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
-        const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
-        Handlers.push_back(Catch);
-      }
-      FuncInfo.EHPadStateMap[CatchSwitch] = ParentState;
-      int NewState = ParentState;
-      for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend();
-           HandlerI != HandlerE; ++HandlerI) {
-        const CatchPadInst *Catch = *HandlerI;
-        const BasicBlock *PadBlock = Catch->getParent();
+          (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
+                                        : ClrHandlerType::Finally);
+      int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1,
+                                         HandlerType, 0, Pad->getParent());
+      // Queue any child EH pads on the worklist.
+      for (const User *U : Cleanup->users())
+        if (const auto *I = dyn_cast<Instruction>(U))
+          if (I->isEHPad())
+            Worklist.emplace_back(I, CleanupState);
+      // Remember this pad's state.
+      FuncInfo.EHPadStateMap[Cleanup] = CleanupState;
+    } else {
+      // Walk the handlers of this catchswitch in reverse order since all but
+      // the last need to set the following one as its TryParentState.
+      const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
+      int CatchState = -1, FollowerState = -1;
+      SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
+      for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend();
+           CBI != CBE; ++CBI, FollowerState = CatchState) {
+        const BasicBlock *CatchBlock = *CBI;
+        // Create the entry for this catch with the appropriate handler
+        // properties.
+        const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
        uint32_t TypeToken = static_cast<uint32_t>(
            cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
-        NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch,
-                                   TypeToken, PadBlock);
-        FuncInfo.EHPadStateMap[Catch] = NewState;
+        CatchState =
+            addClrEHHandler(FuncInfo, HandlerParentState, FollowerState,
+                            ClrHandlerType::Catch, TypeToken, CatchBlock);
+        // Queue any child EH pads on the worklist.
+        for (const User *U : Catch->users())
+          if (const auto *I = dyn_cast<Instruction>(U))
+            if (I->isEHPad())
+              Worklist.emplace_back(I, CatchState);
+        // Remember this catch's state.
+        FuncInfo.EHPadStateMap[Catch] = CatchState;
      }
-      for (const auto *CatchPad : Handlers) {
-        for (const User *U : CatchPad->users()) {
-          const auto *UserI = cast<Instruction>(U);
-          if (UserI->isEHPad())
-            Worklist.emplace_back(UserI, ParentState);
-        }
-      }
-      PredState = NewState;
-      ParentPad = CatchSwitch->getParentPad();
-    } else {
-      llvm_unreachable("Unexpected EH pad");
-    }
-
-    // Queue all predecessors with the given state
-    for (const BasicBlock *Pred : predecessors(Pad->getParent())) {
-      if ((Pred = getEHPadFromPredecessor(Pred, ParentPad)))
-        Worklist.emplace_back(Pred->getFirstNonPHI(), PredState);
+      // Associate the catchswitch with the state of its first catch.
+      assert(CatchSwitch->getNumHandlers());
+      FuncInfo.EHPadStateMap[CatchSwitch] = CatchState;
    }
  }

+  // Step two: record the TryParentState of each state.  For cleanuppads that
+  // don't have cleanuprets, we may need to infer this from their child pads,
+  // so visit pads in descendant-most to ancestor-most order.
+  for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(),
+            End = FuncInfo.ClrEHUnwindMap.rend();
+       Entry != End; ++Entry) {
+    const Instruction *Pad =
+        Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI();
+    // For most pads, the TryParentState is the state associated with the
+    // unwind dest of exceptional exits from it.
+    const BasicBlock *UnwindDest;
+    if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) {
+      // If a catch is not the last in its catchswitch, its TryParentState is
+      // the state associated with the next catch in the switch, even though
+      // that's not the unwind dest of exceptions escaping the catch.  Those
+      // cases were already assigned a TryParentState in the first pass, so
+      // skip them.
+      if (Entry->TryParentState != -1)
+        continue;
+      // Otherwise, get the unwind dest from the catchswitch.
+      UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
+    } else {
+      const auto *Cleanup = cast<CleanupPadInst>(Pad);
+      UnwindDest = nullptr;
+      for (const User *U : Cleanup->users()) {
+        if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
+          // Common and unambiguous case -- cleanupret indicates cleanup's
+          // unwind dest.
+          UnwindDest = CleanupRet->getUnwindDest();
+          break;
+        }
+
+        // Get an unwind dest for the user
+        const BasicBlock *UserUnwindDest = nullptr;
+        if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
+          UserUnwindDest = Invoke->getUnwindDest();
+        } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) {
+          UserUnwindDest = CatchSwitch->getUnwindDest();
+        } else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) {
+          int UserState = FuncInfo.EHPadStateMap[ChildCleanup];
+          int UserUnwindState =
+              FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
+          if (UserUnwindState != -1)
+            UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState]
+                                 .Handler.get<const BasicBlock *>();
+        }
+
+        // Not having an unwind dest for this user might indicate that it
+        // doesn't unwind, so can't be taken as proof that the cleanup itself
+        // may unwind to caller (see e.g. SimplifyUnreachable and
+        // RemoveUnwindEdge).
+        if (!UserUnwindDest)
+          continue;
+
+        // Now we have an unwind dest for the user, but we need to see if it
+        // unwinds all the way out of the cleanup or if it stays within it.
+        const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI();
+        const Value *UserUnwindParent;
+        if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad))
+          UserUnwindParent = CSI->getParentPad();
+        else
+          UserUnwindParent =
+              cast<CleanupPadInst>(UserUnwindPad)->getParentPad();
+
+        // The unwind stays within the cleanup iff it targets a child of the
+        // cleanup.
+        if (UserUnwindParent == Cleanup)
+          continue;
+
+        // This unwind exits the cleanup, so its dest is the cleanup's dest.
+        UnwindDest = UserUnwindDest;
+        break;
+      }
+    }
+
+    // Record the state of the unwind dest as the TryParentState.
+    int UnwindDestState;
+
+    // If UnwindDest is null at this point, either the pad in question can
+    // be exited by unwind to caller, or it cannot be exited by unwind.  In
+    // either case, reporting such cases as unwinding to caller is correct.
+    // This can lead to EH tables that "look strange" -- if this pad's is in
+    // a parent funclet which has other children that do unwind to an enclosing
+    // pad, the try region for this pad will be missing the "duplicate" EH
+    // clause entries that you'd expect to see covering the whole parent.  That
+    // should be benign, since the unwind never actually happens.  If it were
+    // an issue, we could add a subsequent pass that pushes unwind dests down
+    // from parents that have them to children that appear to unwind to caller.
+    if (!UnwindDest) {
+      UnwindDestState = -1;
+    } else {
+      UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
+    }
+
+    Entry->TryParentState = UnwindDestState;
+  }
+
+  // Step three: transfer information from pads to invokes.
  calculateStateNumbersForInvokes(Fn, FuncInfo);
 }

@ -597,6 +718,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
  for (auto &Funclets : FuncletBlocks) {
    BasicBlock *FuncletPadBB = Funclets.first;
    std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
+    Value *FuncletToken;
+    if (FuncletPadBB == &F.getEntryBlock())
+      FuncletToken = ConstantTokenNone::get(F.getContext());
+    else
+      FuncletToken = FuncletPadBB->getFirstNonPHI();

    std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
    ValueToValueMapTy VMap;
@ -668,15 +794,44 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
        RemapInstruction(&I, VMap,
                         RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);

+    // Catchrets targeting cloned blocks need to be updated separately from
+    // the loop above because they are not in the current funclet.
+    SmallVector<CatchReturnInst *, 2> FixupCatchrets;
+    for (auto &BBMapping : Orig2Clone) {
+      BasicBlock *OldBlock = BBMapping.first;
+      BasicBlock *NewBlock = BBMapping.second;
+
+      FixupCatchrets.clear();
+      for (BasicBlock *Pred : predecessors(OldBlock))
+        if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
+          if (CatchRet->getParentPad() == FuncletToken)
+            FixupCatchrets.push_back(CatchRet);
+
+      for (CatchReturnInst *CatchRet : FixupCatchrets)
+        CatchRet->setSuccessor(NewBlock);
+    }
+
    auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
      unsigned NumPreds = PN->getNumIncomingValues();
      for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
           ++PredIdx) {
        BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
-        ColorVector &IncomingColors = BlockColors[IncomingBlock];
-        bool BlockInFunclet = IncomingColors.size() == 1 &&
-                              IncomingColors.front() == FuncletPadBB;
-        if (IsForOldBlock != BlockInFunclet)
+        bool EdgeTargetsFunclet;
+        if (auto *CRI =
+                dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+          EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken);
+        } else {
+          ColorVector &IncomingColors = BlockColors[IncomingBlock];
+          assert(!IncomingColors.empty() && "Block not colored!");
+          assert((IncomingColors.size() == 1 ||
+                  llvm::all_of(IncomingColors,
+                               [&](BasicBlock *Color) {
+                                 return Color != FuncletPadBB;
+                               })) &&
+                 "Cloning should leave this funclet's blocks monochromatic");
+          EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
+        }
+        if (IsForOldBlock != EdgeTargetsFunclet)
          continue;
        PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
        // Revisit the next entry.
@ -864,7 +1019,6 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
 }

 void WinEHPrepare::verifyPreparedFunclets(Function &F) {
-  // Recolor the CFG to verify that all is well.
  for (BasicBlock &BB : F) {
    size_t NumColors = BlockColors[&BB].size();
    assert(NumColors == 1 && "Expected monochromatic BB!");
@ -872,12 +1026,8 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) {
      report_fatal_error("Uncolored BB!");
    if (NumColors > 1)
      report_fatal_error("Multicolor BB!");
-    if (!DisableDemotion) {
-      bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin());
-      assert(!EHPadHasPHI && "EH Pad still has a PHI!");
-      if (EHPadHasPHI)
-        report_fatal_error("EH Pad still has a PHI!");
-    }
+    assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) &&
+           "EH Pad still has a PHI!");
  }
 }

@ -896,12 +1046,17 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {
    demotePHIsOnFunclets(F);

  if (!DisableCleanups) {
+    DEBUG(verifyFunction(F));
    removeImplausibleInstructions(F);

+    DEBUG(verifyFunction(F));
    cleanupPreparedFunclets(F);
  }

-  verifyPreparedFunclets(F);
+  DEBUG(verifyPreparedFunclets(F));
+  // Recolor the CFG to verify that all is well.
+  DEBUG(colorFunclets(F));
+  DEBUG(verifyPreparedFunclets(F));

  BlockColors.clear();
  FuncletBlocks.clear();
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@ -2060,7 +2060,7 @@ class AssemblyWriter {

  // printGCRelocateComment - print comment after call to the gc.relocate
  // intrinsic indicating base and derived pointer names.
-  void printGCRelocateComment(const Value &V);
+  void printGCRelocateComment(const GCRelocateInst &Relocate);
 };
 } // namespace

@ -2722,14 +2722,11 @@ void AssemblyWriter::printInstructionLine(const Instruction &I) {

 /// printGCRelocateComment - print comment after call to the gc.relocate
 /// intrinsic indicating base and derived pointer names.
-void AssemblyWriter::printGCRelocateComment(const Value &V) {
-  assert(isGCRelocate(&V));
-  GCRelocateOperands GCOps(cast<Instruction>(&V));
-
+void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) {
  Out << " ; (";
-  writeOperand(GCOps.getBasePtr(), false);
+  writeOperand(Relocate.getBasePtr(), false);
  Out << ", ";
-  writeOperand(GCOps.getDerivedPtr(), false);
+  writeOperand(Relocate.getDerivedPtr(), false);
  Out << ")";
 }

@ -2737,8 +2734,8 @@ void AssemblyWriter::printGCRelocateComment(const Value &V) {
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
-  if (isGCRelocate(&V))
-    printGCRelocateComment(V);
+  if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V))
+    printGCRelocateComment(*Relocate);

  if (AnnotationWriter)
    AnnotationWriter->printInfoComment(V, Out);
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@ -641,14 +641,15 @@ AttributeSet AttributeSet::get(LLVMContext &C,
  if (Attrs.empty())
    return AttributeSet();

-#ifndef NDEBUG
-  for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
-    assert((!i || Attrs[i-1].first <= Attrs[i].first) &&
-           "Misordered Attributes list!");
-    assert(!Attrs[i].second.hasAttribute(Attribute::None) &&
-           "Pointless attribute!");
-  }
-#endif
+  assert(std::is_sorted(Attrs.begin(), Attrs.end(),
+                        [](const std::pair<unsigned, Attribute> &LHS,
+                           const std::pair<unsigned, Attribute> &RHS) {
+                          return LHS.first < RHS.first;
+                        }) && "Misordered Attributes list!");
+  assert(std::none_of(Attrs.begin(), Attrs.end(),
+                      [](const std::pair<unsigned, Attribute> &Pair) {
+                        return Pair.second.hasAttribute(Attribute::None);
+                      }) && "Pointless attribute!");

  // Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
  // list.
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@ -76,22 +76,21 @@ iplist<Instruction>::iterator Instruction::eraseFromParent() {
  return getParent()->getInstList().erase(getIterator());
 }

-/// insertBefore - Insert an unlinked instructions into a basic block
-/// immediately before the specified instruction.
+/// Insert an unlinked instruction into a basic block immediately before the
+/// specified instruction.
 void Instruction::insertBefore(Instruction *InsertPos) {
  InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this);
 }

-/// insertAfter - Insert an unlinked instructions into a basic block
-/// immediately after the specified instruction.
+/// Insert an unlinked instruction into a basic block immediately after the
+/// specified instruction.
 void Instruction::insertAfter(Instruction *InsertPos) {
  InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(),
                                                    this);
 }

-/// moveBefore - Unlink this instruction from its current basic block and
-/// insert it into the basic block that MovePos lives in, right before
-/// MovePos.
+/// Unlink this instruction from its current basic block and insert it into the
+/// basic block that MovePos lives in, right before MovePos.
 void Instruction::moveBefore(Instruction *MovePos) {
  MovePos->getParent()->getInstList().splice(
      MovePos->getIterator(), getParent()->getInstList(), getIterator());
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@ -609,20 +609,6 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
  return setSuccessor(idx, B);
 }

-bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const {
-  if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
-    return true;
-
-  // Operand bundles override attributes on the called function, but don't
-  // override attributes directly present on the invoke instruction.
-  if (isFnAttrDisallowedByOpBundle(A))
-    return false;
-
-  if (const Function *F = getCalledFunction())
-    return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
-  return false;
-}
-
 bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
  assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");

@ -934,6 +920,17 @@ void CatchSwitchInst::addHandler(BasicBlock *Handler) {
  getOperandList()[OpNo] = Handler;
 }

+void CatchSwitchInst::removeHandler(handler_iterator HI) {
+  // Move all subsequent handlers up one.
+  Use *EndDst = op_end() - 1;
+  for (Use *CurDst = HI.getCurrent(); CurDst != EndDst; ++CurDst)
+    *CurDst = *(CurDst + 1);
+  // Null out the last handler use.
+  *EndDst = nullptr;
+
+  setNumHungOffUseOperands(getNumOperands() - 1);
+}
+
 BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const {
  return getSuccessor(idx);
 }
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@ -190,6 +190,8 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New,
 void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
  assert(!(MD && isa<MDNode>(MD) && cast<MDNode>(MD)->isTemporary()) &&
         "Expected non-temp node");
+  assert(CanReplace &&
+         "Attempted to replace Metadata marked for no replacement");

  if (UseMap.empty())
    return;
@ -555,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() {
    resolve();
 }

-void MDNode::resolveCycles(bool MDMaterialized) {
+void MDNode::resolveCycles(bool AllowTemps) {
  if (isResolved())
    return;

@ -568,7 +570,7 @@ void MDNode::resolveCycles(bool MDMaterialized) {
    if (!N)
      continue;

-    if (N->isTemporary() && !MDMaterialized)
+    if (N->isTemporary() && AllowTemps)
      continue;
    assert(!N->isTemporary() &&
           "Expected all forward declarations to be resolved");
--- a/contrib/llvm/lib/IR/Statepoint.cpp
+++ b/contrib/llvm/lib/IR/Statepoint.cpp
@ -40,20 +40,7 @@ bool llvm::isStatepoint(const Value &inst) {
 }

 bool llvm::isGCRelocate(const ImmutableCallSite &CS) {
-  if (!CS.getInstruction()) {
-    // This is not a call site
-    return false;
-  }
-
-  return isGCRelocate(CS.getInstruction());
-}
-bool llvm::isGCRelocate(const Value *inst) {
-  if (const CallInst *call = dyn_cast<CallInst>(inst)) {
-    if (const Function *F = call->getCalledFunction()) {
-      return F->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
-    }
-  }
-  return false;
+  return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction());
 }

 bool llvm::isGCResult(const ImmutableCallSite &CS) {
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@ -1657,14 +1657,14 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
    const CallInst *Call = dyn_cast<const CallInst>(U);
    Assert(Call, "illegal use of statepoint token", &CI, U);
    if (!Call) continue;
-    Assert(isGCRelocate(Call) || isGCResult(Call),
+    Assert(isa<GCRelocateInst>(Call) || isGCResult(Call),
           "gc.result or gc.relocate are the only value uses"
           "of a gc.statepoint",
           &CI, U);
    if (isGCResult(Call)) {
      Assert(Call->getArgOperand(0) == &CI,
             "gc.result connected to wrong gc.statepoint", &CI, Call);
-    } else if (isGCRelocate(Call)) {
+    } else if (isa<GCRelocateInst>(Call)) {
      Assert(Call->getArgOperand(0) == &CI,
             "gc.relocate connected to wrong gc.statepoint", &CI, Call);
    }
@ -3019,8 +3019,7 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
         &CPI);

  auto *ParentPad = CPI.getParentPad();
-  Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
-             isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+  Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
         "CleanupPadInst has an invalid parent.", &CPI);

  User *FirstUser = nullptr;
@ -3077,10 +3076,17 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
  }

  auto *ParentPad = CatchSwitch.getParentPad();
-  Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
-             isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+  Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
         "CatchSwitchInst has an invalid parent.", ParentPad);

+  Assert(CatchSwitch.getNumHandlers() != 0,
+         "CatchSwitchInst cannot have empty handler list", &CatchSwitch);
+
+  for (BasicBlock *Handler : CatchSwitch.handlers()) {
+    Assert(isa<CatchPadInst>(Handler->getFirstNonPHI()),
+           "CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler);
+  }
+
  visitTerminatorInst(CatchSwitch);
 }

@ -3675,8 +3681,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {

    // Verify rest of the relocate arguments

-    GCRelocateOperands Ops(CS);
-    ImmutableCallSite StatepointCS(Ops.getStatepoint());
+    ImmutableCallSite StatepointCS(
+        cast<GCRelocateInst>(*CS.getInstruction()).getStatepoint());

    // Both the base and derived must be piped through the safepoint
    Value* Base = CS.getArgOperand(1);
@ -3731,14 +3737,14 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
    // Relocated value must be a pointer type, but gc_relocate does not need to return the
    // same pointer type as the relocated pointer. It can be casted to the correct type later
    // if it's desired. However, they must have the same address space.
-    GCRelocateOperands Operands(CS);
-    Assert(Operands.getDerivedPtr()->getType()->isPointerTy(),
+    GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
+    Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(),
           "gc.relocate: relocated value must be a gc pointer", CS);

    // gc_relocate return type must be a pointer type, and is verified earlier in
    // VerifyIntrinsicType().
    Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
-           cast<PointerType>(Operands.getDerivedPtr()->getType())->getAddressSpace(),
+           cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(),
           "gc.relocate: relocating a pointer shouldn't change its address space", CS);
    break;
  }
--- a/contrib/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@ -524,6 +524,23 @@ class IRLinker {
      ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata;
  }

+  ~IRLinker() {
+    // In the case where we are not linking metadata, we unset the CanReplace
+    // flag on all temporary metadata in the MetadataToIDs map to ensure
+    // none was replaced while being a map key. Now that we are destructing
+    // the map, set the flag back to true, so that it is replaceable during
+    // metadata linking.
+    if (!shouldLinkMetadata()) {
+      for (auto MDI : MetadataToIDs) {
+        Metadata *MD = const_cast<Metadata *>(MDI.first);
+        MDNode *Node = dyn_cast<MDNode>(MD);
+        assert((Node && Node->isTemporary()) &&
+               "Found non-temp metadata in map when not linking metadata");
+        Node->setCanReplace(true);
+      }
+    }
+  }
+
  bool run();
  Value *materializeDeclFor(Value *V, bool ForAlias);
  void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias);
@ -1111,7 +1128,8 @@ bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
    // a function and before remapping metadata on instructions below
    // in RemapInstruction, as the saved mapping is used to handle
    // the temporary metadata hanging off instructions.
-    SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true);
+    SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
+                                             /* OnlyTempMD = */ true);

  // Link in the prefix data.
  if (Src.hasPrefixData())
@ -1514,7 +1532,8 @@ bool IRLinker::run() {
      // Ensure metadata materialized
      if (SrcM.getMaterializer()->materializeMetadata())
        return true;
-      SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false);
+      SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
+                                               /* OnlyTempMD = */ false);
    }

    linkNamedMDNodes();
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@ -514,13 +514,13 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
  MCOS->EmitULEB128IntValue(1);
  MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
  MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
-  EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list,
-             context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
-                                            : dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, context.getDwarfVersion() >= 4
+                                               ? dwarf::DW_FORM_sec_offset
+                                               : dwarf::DW_FORM_data4);
  if (context.getGenDwarfSectionSyms().size() > 1 &&
      context.getDwarfVersion() >= 3) {
-    EmitAbbrev(MCOS, dwarf::DW_AT_ranges,
-               context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+    EmitAbbrev(MCOS, dwarf::DW_AT_ranges, context.getDwarfVersion() >= 4
+                                              ? dwarf::DW_FORM_sec_offset
                                              : dwarf::DW_FORM_data4);
  } else {
    EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@ -1,4 +1,4 @@
-//===-- MObjectFileInfo.cpp - Object File Information ---------------------===//
+//===-- MCObjectFileInfo.cpp - Object File Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
--- a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@ -63,31 +63,30 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
 /// ToggleFeature - Toggle a feature and returns the re-computed feature
 /// bits. This version will also change all implied bits.
 FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
-  SubtargetFeatures Features;
-  FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures);
+  SubtargetFeatures::ToggleFeature(FeatureBits, FS, ProcFeatures);
  return FeatureBits;
 }

 FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
-  SubtargetFeatures Features;
-  FeatureBits = Features.ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
+  SubtargetFeatures::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
  return FeatureBits;
 }

 const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
  assert(ProcSchedModels && "Processor machine model not available!");

-  size_t NumProcs = ProcDesc.size();
-  assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs,
+  ArrayRef<SubtargetInfoKV> SchedModels(ProcSchedModels, ProcDesc.size());
+
+  assert(std::is_sorted(SchedModels.begin(), SchedModels.end(),
                    [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
                      return strcmp(LHS.Key, RHS.Key) < 0;
                    }) &&
         "Processor machine model table is not sorted");

  // Find entry
-  const SubtargetInfoKV *Found =
-    std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
-  if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
+  auto Found =
+    std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU);
+  if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) {
    if (CPU != "help") // Don't error if the user asked for help.
      errs() << "'" << CPU
             << "' is not a recognized processor for this target"
--- a/contrib/llvm/lib/MC/SubtargetFeature.cpp
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@ -160,10 +160,9 @@ void ClearImpliedBits(FeatureBitset &Bits,
  }
 }

-/// ToggleFeature - Toggle a feature and returns the newly updated feature
-/// bits.
-FeatureBitset
-SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
+/// ToggleFeature - Toggle a feature and update the feature bits.
+void
+SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
                                 ArrayRef<SubtargetFeatureKV> FeatureTable) {

  // Find feature in table.
@ -186,12 +185,9 @@ SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
           << "' is not a recognized feature for this target"
           << " (ignoring feature)\n";
  }
-
-  return Bits;
 }

-FeatureBitset
-SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
+void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
                                    ArrayRef<SubtargetFeatureKV> FeatureTable) {

  assert(hasFlag(Feature));
@ -203,7 +199,7 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
  if (FeatureEntry) {
    // Enable/disable feature in bits
    if (isEnabled(Feature)) {
-      Bits |=  FeatureEntry->Value;
+      Bits |= FeatureEntry->Value;

      // For each feature that this implies, set it.
      SetImpliedBits(Bits, FeatureEntry, FeatureTable);
@ -218,8 +214,6 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
           << "' is not a recognized feature for this target"
           << " (ignoring feature)\n";
  }
-
-  return Bits;
 }


@ -234,14 +228,10 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
    return FeatureBitset();

 #ifndef NDEBUG
-  for (size_t i = 1, e = CPUTable.size(); i != e; ++i) {
-    assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
-           "CPU table is not sorted");
-  }
-  for (size_t i = 1, e = FeatureTable.size(); i != e; ++i) {
-    assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
-          "CPU features table is not sorted");
-  }
+  assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) &&
+         "CPU table is not sorted");
+  assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) &&
+         "CPU features table is not sorted");
 #endif
  // Resulting bits
  FeatureBitset Bits;
@ -277,7 +267,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
    if (Feature == "+help")
      Help(CPUTable, FeatureTable);

-    Bits = ApplyFeatureFlag(Bits, Feature, FeatureTable);
+    ApplyFeatureFlag(Bits, Feature, FeatureTable);
  }

  return Bits;
--- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
@ -316,12 +316,17 @@ static std::error_code readCoverageMappingData(

  // Read the records in the coverage data section.
  for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
-    if (Buf + 4 * sizeof(uint32_t) > End)
+    if (Buf + sizeof(CovMapHeader) > End)
      return coveragemap_error::malformed;
-    uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf);
-    uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
-    uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
-    uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+    auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
+    uint32_t NRecords =
+        endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords);
+    uint32_t FilenamesSize =
+        endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize);
+    uint32_t CoverageSize =
+        endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize);
+    uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version);
+    Buf = reinterpret_cast<const char *>(++CovHeader);

    switch (Version) {
    case CoverageMappingVersion1:
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@ -12,12 +12,15 @@
 //
 //===----------------------------------------------------------------------===//

+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/ManagedStatic.h"

 using namespace llvm;
@ -162,6 +165,98 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
  return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
 }

+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+                              bool doCompression, std::string &Result) {
+  uint8_t Header[16], *P = Header;
+  std::string UncompressedNameStrings =
+      join(NameStrs.begin(), NameStrs.end(), StringRef(" "));
+
+  unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
+  P += EncLen;
+
+  auto WriteStringToResult = [&](size_t CompressedLen,
+                                 const std::string &InputStr) {
+    EncLen = encodeULEB128(CompressedLen, P);
+    P += EncLen;
+    char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
+    unsigned HeaderLen = P - &Header[0];
+    Result.append(HeaderStr, HeaderLen);
+    Result += InputStr;
+    return 0;
+  };
+
+  if (!doCompression)
+    return WriteStringToResult(0, UncompressedNameStrings);
+
+  SmallVector<char, 128> CompressedNameStrings;
+  zlib::Status Success =
+      zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
+                     zlib::BestSizeCompression);
+
+  if (Success != zlib::StatusOK)
+    return 1;
+
+  return WriteStringToResult(
+      CompressedNameStrings.size(),
+      std::string(CompressedNameStrings.data(), CompressedNameStrings.size()));
+}
+
+StringRef getPGOFuncNameInitializer(GlobalVariable *NameVar) {
+  auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
+  StringRef NameStr =
+      Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
+  return NameStr;
+}
+
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+                              std::string &Result) {
+  std::vector<std::string> NameStrs;
+  for (auto *NameVar : NameVars) {
+    NameStrs.push_back(getPGOFuncNameInitializer(NameVar));
+  }
+  return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
+}
+
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
+  const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
+  const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
+                                                          NameStrings.size());
+  while (P < EndP) {
+    uint32_t N;
+    uint64_t UncompressedSize = decodeULEB128(P, &N);
+    P += N;
+    uint64_t CompressedSize = decodeULEB128(P, &N);
+    P += N;
+    bool isCompressed = (CompressedSize != 0);
+    SmallString<128> UncompressedNameStrings;
+    StringRef NameStrings;
+    if (isCompressed) {
+      StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
+                                      CompressedSize);
+      if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
+                           UncompressedSize) != zlib::StatusOK)
+        return 1;
+      P += CompressedSize;
+      NameStrings = StringRef(UncompressedNameStrings.data(),
+                              UncompressedNameStrings.size());
+    } else {
+      NameStrings =
+          StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
+      P += UncompressedSize;
+    }
+    // Now parse the name strings.
+    SmallVector<StringRef, 0> Names;
+    NameStrings.split(Names, ' ');
+    for (StringRef &Name : Names)
+      Symtab.addFuncName(Name);
+
+    while (P < EndP && *P == 0)
+      P++;
+  }
+  Symtab.finalizeSymtab();
+  return 0;
+}
+
 instrprof_error
 InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
                                         uint64_t Weight) {
--- a/contrib/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@ -446,7 +446,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
  return EC;
 }

-bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
  static long ArgMax = sysconf(_SC_ARG_MAX);

  // System says no practical limit.
@ -456,7 +456,7 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
  // Conservatively account for space required by environment variables.
  long HalfArgMax = ArgMax / 2;

-  size_t ArgLength = 0;
+  size_t ArgLength = Program.size() + 1;
  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
       I != E; ++I) {
    ArgLength += strlen(*I) + 1;
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@ -535,14 +535,15 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
  return EC;
 }

-bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
  // The documented max length of the command line passed to CreateProcess.
  static const size_t MaxCommandStringLength = 32768;
-  size_t ArgLength = 0;
+  // Account for the trailing space for the program path and the
+  // trailing NULL of the last argument.
+  size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2;
  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
       I != E; ++I) {
-    // Account for the trailing space for every arg but the last one and the
-    // trailing NULL of the last argument.
+    // Account for the trailing space for every arg
    ArgLength += ArgLenWithQuotes(*I) + 1;
    if (ArgLength > MaxCommandStringLength) {
      return false;
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@ -30,6 +30,9 @@
 #define _WIN32_WINNT 0x0601
 #define _WIN32_IE    0x0800 // MinGW at it again. FIXME: verify if still needed.
 #define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif

 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@ -44,6 +47,21 @@
 #include <string>
 #include <vector>

+#if !defined(__CYGWIN__) && !defined(__MINGW32__)
+#include <VersionHelpers.h>
+#else
+// Cygwin does not have the IsWindows8OrGreater() API.
+// Some version of mingw does not have the API either.
+inline bool IsWindows8OrGreater() {
+  OSVERSIONINFO osvi = {};
+  osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+  if (!::GetVersionEx(&osvi))
+    return false;
+  return (osvi.dwMajorVersion > 6 ||
+          (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2));
+}
+#endif // __CYGWIN__
+
 inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
  if (!ErrMsg)
    return true;
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@ -57,6 +57,10 @@
 #endif
 #endif

+#ifdef LLVM_ON_WIN32
+#include "Windows/WindowsSupport.h"
+#endif
+
 using namespace llvm;

 raw_ostream::~raw_ostream() {
@ -567,8 +571,21 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
  assert(FD >= 0 && "File already closed.");
  pos += Size;

+#ifndef LLVM_ON_WIN32
+  bool ShouldWriteInChunks = false;
+#else
+  // Writing a large size of output to Windows console returns ENOMEM. It seems
+  // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
+  // the latter has a size limit (66000 bytes or less, depending on heap usage).
+  bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater();
+#endif
+
  do {
-    ssize_t ret = ::write(FD, Ptr, Size);
+    size_t ChunkSize = Size;
+    if (ChunkSize > 32767 && ShouldWriteInChunks)
+        ChunkSize = 32767;
+
+    ssize_t ret = ::write(FD, Ptr, ChunkSize);

    if (ret < 0) {
      // If it's a recoverable error, swallow it and retry the write.
--- a/contrib/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@ -722,7 +722,7 @@ Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const {

 std::string UnOpInit::getAsString() const {
  std::string Result;
-  switch (Opc) {
+  switch (getOpcode()) {
  case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
  case HEAD: Result = "!head"; break;
  case TAIL: Result = "!tail"; break;
@ -850,7 +850,7 @@ Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const {

 std::string BinOpInit::getAsString() const {
  std::string Result;
-  switch (Opc) {
+  switch (getOpcode()) {
  case CONCAT: Result = "!con"; break;
  case ADD: Result = "!add"; break;
  case AND: Result = "!and"; break;
@ -1054,7 +1054,7 @@ Init *TernOpInit::resolveReferences(Record &R,
                                    const RecordVal *RV) const {
  Init *lhs = LHS->resolveReferences(R, RV);

-  if (Opc == IF && lhs != LHS) {
+  if (getOpcode() == IF && lhs != LHS) {
    IntInit *Value = dyn_cast<IntInit>(lhs);
    if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
      Value = dyn_cast<IntInit>(I);
@ -1082,7 +1082,7 @@ Init *TernOpInit::resolveReferences(Record &R,

 std::string TernOpInit::getAsString() const {
  std::string Result;
-  switch (Opc) {
+  switch (getOpcode()) {
  case SUBST: Result = "!subst"; break;
  case FOREACH: Result = "!foreach"; break;
  case IF: Result = "!if"; break;
--- a/contrib/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm/lib/TableGen/TGParser.cpp
@ -77,7 +77,8 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
 /// SetValue -
 /// Return true on error, false on success.
 bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
-                        const std::vector<unsigned> &BitList, Init *V) {
+                        ArrayRef<unsigned> BitList, Init *V,
+                        bool AllowSelfAssignment) {
  if (!V) return false;

  if (!CurRec) CurRec = &CurMultiClass->Rec;
@ -91,8 +92,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
  // in the resolution machinery.
  if (BitList.empty())
    if (VarInit *VI = dyn_cast<VarInit>(V))
-      if (VI->getNameInit() == ValName)
-        return false;
+      if (VI->getNameInit() == ValName && !AllowSelfAssignment)
+        return true;

  // If we are assigning to a subset of the bits in the value... then we must be
  // assigning to a field of BitsRecTy, which must have a BitsInit
@ -165,7 +166,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
    if (i < SubClass.TemplateArgs.size()) {
      // If a value is specified for this template arg, set it now.
      if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i],
-                   std::vector<unsigned>(), SubClass.TemplateArgs[i]))
+                   None, SubClass.TemplateArgs[i]))
        return true;

      // Resolve it next.
@ -243,8 +244,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
      // If a value is specified for this template arg, set it in the
      // superclass now.
      if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i],
-                   std::vector<unsigned>(),
-                   SubMultiClass.TemplateArgs[i]))
+                   None, SubMultiClass.TemplateArgs[i]))
        return true;

      // Resolve it next.
@ -258,8 +258,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
      for (const auto &Def :
             makeArrayRef(CurMC->DefPrototypes).slice(newDefStart)) {
        if (SetValue(Def.get(), SubMultiClass.RefRange.Start, SMCTArgs[i],
-                     std::vector<unsigned>(),
-                     SubMultiClass.TemplateArgs[i]))
+                     None, SubMultiClass.TemplateArgs[i]))
          return true;

        // Resolve it next.
@ -332,8 +331,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){

    IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));

-    if (SetValue(IterRec.get(), Loc, IterVar->getName(),
-                 std::vector<unsigned>(), IVal))
+    if (SetValue(IterRec.get(), Loc, IterVar->getName(), None, IVal))
      return Error(Loc, "when instantiating this def");

    // Resolve it next.
@ -1728,7 +1726,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
    SMLoc ValLoc = Lex.getLoc();
    Init *Val = ParseValue(CurRec, Type);
    if (!Val ||
-        SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
+        SetValue(CurRec, ValLoc, DeclName, None, Val))
      // Return the name, even if an error is thrown.  This is so that we can
      // continue to make some progress, even without the value having been
      // initialized.
@ -2358,8 +2356,8 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
  // Set the value for NAME. We don't resolve references to it 'til later,
  // though, so that uses in nested multiclass names don't get
  // confused.
-  if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME",
-               std::vector<unsigned>(), DefmPrefix)) {
+  if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME", None, DefmPrefix,
+               /*AllowSelfAssignment*/true)) {
    Error(DefmPrefixRange.Start, "Could not resolve " +
          CurRec->getNameInitAsString() + ":NAME to '" +
          DefmPrefix->getAsUnquotedString() + "'");
@ -2446,8 +2444,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec,
    // Check if a value is specified for this temp-arg.
    if (i < TemplateVals.size()) {
      // Set it now.
-      if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
-                   TemplateVals[i]))
+      if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], None, TemplateVals[i]))
        return true;

      // Resolve it next.
--- a/contrib/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@ -105,10 +105,13 @@ class TGParser {
 private:  // Semantic analysis methods.
  bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
  bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
-                const std::vector<unsigned> &BitList, Init *V);
+                ArrayRef<unsigned> BitList, Init *V,
+                bool AllowSelfAssignment = false);
  bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
-                const std::vector<unsigned> &BitList, Init *V) {
-    return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V);
+                ArrayRef<unsigned> BitList, Init *V,
+                bool AllowSelfAssignment = false) {
+    return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V,
+                    AllowSelfAssignment);
  }
  bool AddSubClass(Record *Rec, SubClassReference &SubClass);
  bool AddSubMultiClass(MultiClass *CurMC,
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@ -124,6 +124,14 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                   FeaturePerfMon,
                                   FeatureZCRegMove, FeatureZCZeroing]>;

+def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
+                                    "Samsung Exynos-M1 processors",
+                                    [FeatureFPARMv8,
+                                    FeatureNEON,
+                                    FeatureCrypto,
+                                    FeatureCRC,
+                                    FeaturePerfMon]>;
+
 def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
                                              FeatureNEON,
                                              FeatureCRC,
@ -136,6 +144,8 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
 // FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
 def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
 def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
+// FIXME: Exynos-M1 is currently modelled without a specific SchedModel.
+def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>;

 //===----------------------------------------------------------------------===//
 // Assembly parser
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@ -158,7 +158,7 @@ INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
                    "AArch64 A57 FP Load-Balancing", false, false)

 namespace {
-/// A Chain is a sequence of instructions that are linked together by 
+/// A Chain is a sequence of instructions that are linked together by
 /// an accumulation operand. For example:
 ///
 ///   fmul d0<def>, ?
@ -285,7 +285,7 @@ class Chain {
  std::string str() const {
    std::string S;
    raw_string_ostream OS(S);
-    
+
    OS << "{";
    StartInst->print(OS, /* SkipOpers= */true);
    OS << " -> ";
@ -427,7 +427,7 @@ Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
      return Ch;
    }
  }
-  
+
  // Bailout case - just return the first item.
  Chain *Ch = L.front();
  L.erase(L.begin());
@ -495,7 +495,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
  RS.enterBasicBlock(&MBB);
  RS.forward(MachineBasicBlock::iterator(G->getStart()));

-  // Can we find an appropriate register that is available throughout the life 
+  // Can we find an appropriate register that is available throughout the life
  // of the chain?
  unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
  BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -2426,7 +2426,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(

      continue;
    }
-    
+
    if (VA.isRegLoc()) {
      // Arguments stored in registers.
      EVT RegVT = VA.getLocVT();
@ -5074,7 +5074,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,

  // The index of an EXT is the first element if it is not UNDEF.
  // Watch out for the beginning UNDEFs. The EXT index should be the expected
-  // value of the first element.  E.g. 
+  // value of the first element.  E.g.
  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
  // ExpectedElt is the last mask index plus 1.
@ -9491,6 +9491,103 @@ static SDValue performBRCONDCombine(SDNode *N,
  return SDValue();
 }

+// Optimize some simple tbz/tbnz cases.  Returns the new operand and bit to test
+// as well as whether the test should be inverted.  This code is required to
+// catch these cases (as opposed to standard dag combines) because
+// AArch64ISD::TBZ is matched during legalization.
+static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
+                                 SelectionDAG &DAG) {
+
+  if (!Op->hasOneUse())
+    return Op;
+
+  // We don't handle undef/constant-fold cases below, as they should have
+  // already been taken care of (e.g. and of 0, test of undefined shifted bits,
+  // etc.)
+
+  // (tbz (trunc x), b) -> (tbz x, b)
+  // This case is just here to enable more of the below cases to be caught.
+  if (Op->getOpcode() == ISD::TRUNCATE &&
+      Bit < Op->getValueType(0).getSizeInBits()) {
+    return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+  }
+
+  if (Op->getNumOperands() != 2)
+    return Op;
+
+  auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+  if (!C)
+    return Op;
+
+  switch (Op->getOpcode()) {
+  default:
+    return Op;
+
+  // (tbz (and x, m), b) -> (tbz x, b)
+  case ISD::AND:
+    if ((C->getZExtValue() >> Bit) & 1)
+      return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+    return Op;
+
+  // (tbz (shl x, c), b) -> (tbz x, b-c)
+  case ISD::SHL:
+    if (C->getZExtValue() <= Bit &&
+        (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
+      Bit = Bit - C->getZExtValue();
+      return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+    }
+    return Op;
+
+  // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
+  case ISD::SRA:
+    Bit = Bit + C->getZExtValue();
+    if (Bit >= Op->getValueType(0).getSizeInBits())
+      Bit = Op->getValueType(0).getSizeInBits() - 1;
+    return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+
+  // (tbz (srl x, c), b) -> (tbz x, b+c)
+  case ISD::SRL:
+    if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
+      Bit = Bit + C->getZExtValue();
+      return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+    }
+    return Op;
+
+  // (tbz (xor x, -1), b) -> (tbnz x, b)
+  case ISD::XOR:
+    if ((C->getZExtValue() >> Bit) & 1)
+      Invert = !Invert;
+    return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+  }
+}
+
+// Optimize test single bit zero/non-zero and branch.
+static SDValue performTBZCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 SelectionDAG &DAG) {
+  unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+  bool Invert = false;
+  SDValue TestSrc = N->getOperand(1);
+  SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
+
+  if (TestSrc == NewTestSrc)
+    return SDValue();
+
+  unsigned NewOpc = N->getOpcode();
+  if (Invert) {
+    if (NewOpc == AArch64ISD::TBZ)
+      NewOpc = AArch64ISD::TBNZ;
+    else {
+      assert(NewOpc == AArch64ISD::TBNZ);
+      NewOpc = AArch64ISD::TBZ;
+    }
+  }
+
+  SDLoc DL(N);
+  return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
+                     DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
+}
+
 // vselect (v1i1 setcc) ->
 //     vselect (v1iXX setcc)  (XX is the size of the compared operand type)
 // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
@ -9642,6 +9739,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    return performSTORECombine(N, DCI, DAG, Subtarget);
  case AArch64ISD::BRCOND:
    return performBRCONDCombine(N, DCI, DAG);
+  case AArch64ISD::TBNZ:
+  case AArch64ISD::TBZ:
+    return performTBZCombine(N, DCI, DAG);
  case AArch64ISD::CSEL:
    return performCONDCombine(N, DCI, DAG, 2, 3);
  case AArch64ISD::DUP:
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@ -613,21 +613,6 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
         (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
 }

-// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
-static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
-                                   MachineInstr *Op1) {
-  assert(MI->memoperands_empty() && "expected a new machineinstr");
-  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
-                      (Op1->memoperands_end() - Op1->memoperands_begin());
-
-  MachineFunction *MF = MI->getParent()->getParent();
-  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
-  MachineSDNode::mmo_iterator MemEnd =
-      std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
-  MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
-  MI->setMemRefs(MemBegin, MemEnd);
-}
-
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                      MachineBasicBlock::iterator Paired,
@ -692,10 +677,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                       TII->get(NewOpc))
                   .addOperand(getLdStRegOp(RtNewDest))
                   .addOperand(BaseRegOp)
-                   .addImm(OffsetImm);
-
-    // Copy MachineMemOperands from the original loads.
-    concatenateMemOperands(NewMemMI, I, Paired);
+                   .addImm(OffsetImm)
+                   .setMemRefs(I->mergeMemRefsWith(*Paired));

    DEBUG(
        dbgs()
@ -786,9 +769,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                  TII->get(NewOpc))
              .addOperand(getLdStRegOp(I))
              .addOperand(BaseRegOp)
-              .addImm(OffsetImm);
-    // Copy MachineMemOperands from the original stores.
-    concatenateMemOperands(MIB, I, Paired);
+              .addImm(OffsetImm)
+              .setMemRefs(I->mergeMemRefsWith(*Paired));
  } else {
    // Handle Unscaled
    if (IsUnscaled)
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@ -33,7 +33,14 @@ class Triple;

 class AArch64Subtarget : public AArch64GenSubtargetInfo {
 protected:
-  enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
+  enum ARMProcFamilyEnum {
+    Others,
+    CortexA35,
+    CortexA53,
+    CortexA57,
+    Cyclone,
+    ExynosM1
+  };

  /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
  ARMProcFamilyEnum ARMProcFamily;
@ -143,6 +150,7 @@ class AArch64Subtarget : public AArch64GenSubtargetInfo {
  bool isCyclone() const { return CPUString == "cyclone"; }
  bool isCortexA57() const { return CPUString == "cortex-a57"; }
  bool isCortexA53() const { return CPUString == "cortex-a53"; }
+  bool isExynosM1() const { return CPUString == "exynos-m1"; }

  bool useAA() const override { return isCortexA53(); }

--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@ -834,7 +834,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
 };

 uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name, 
+AArch64SysReg::SysRegMapper::fromString(StringRef Name,
    const FeatureBitset& FeatureBits, bool &Valid) const {
  std::string NameLower = Name.lower();

@ -878,7 +878,7 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name,
 }

 std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits, 
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
                                      const FeatureBitset& FeatureBits) const {
  // First search the registers shared by all
  for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@ -285,17 +285,17 @@ struct AArch64NamedImmMapper {
    // Zero value of FeatureBitSet means the mapping is always available
    FeatureBitset FeatureBitSet;

-    bool isNameEqual(std::string Other, 
+    bool isNameEqual(std::string Other,
                     const FeatureBitset& FeatureBits) const {
-      if (FeatureBitSet.any() && 
+      if (FeatureBitSet.any() &&
          (FeatureBitSet & FeatureBits).none())
        return false;
      return Name == Other;
    }

-    bool isValueEqual(uint32_t Other, 
+    bool isValueEqual(uint32_t Other,
                      const FeatureBitset& FeatureBits) const {
-      if (FeatureBitSet.any() && 
+      if (FeatureBitSet.any() &&
          (FeatureBitSet & FeatureBits).none())
        return false;
      return Value == Other;
@ -310,7 +310,7 @@ struct AArch64NamedImmMapper {
  StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits,
                     bool &Valid) const;
  // Maps string to value, depending on availability for FeatureBits given
-  uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits, 
+  uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
                     bool &Valid) const;

  /// Many of the instructions allow an alternative assembly form consisting of
@ -1322,7 +1322,7 @@ namespace AArch64TLBI {
      return true;
    }
  }
-} 
+}

 namespace AArch64II {
  /// Target Operand Flag enum.
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@ -118,6 +118,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
        "true",
        "Support flat address space">;

+def FeatureXNACK : SubtargetFeature<"xnack",
+        "EnableXNACK",
+        "true",
+        "Enable XNACK support">;
+
 def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
        "EnableVGPRSpilling",
        "true",
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@ -417,13 +417,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
    }
  }

-  if (VCCUsed || FlatUsed)
+  if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
    MaxSGPR += 2;

-  if (FlatUsed) {
-    MaxSGPR += 2;
-    // 2 additional for VI+.
-    if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    if (FlatUsed)
+      MaxSGPR += 2;
+
+    if (STM.isXNACKEnabled())
      MaxSGPR += 2;
  }

@ -620,6 +620,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
  if (MFI->hasDispatchPtr())
    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

+  if (STM.isXNACKEnabled())
+    header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+
  header.kernarg_segment_byte_size = MFI->ABIArgOffset;
  header.wavefront_sgpr_count = KernelInfo.NumSGPR;
  header.workitem_vgpr_count = KernelInfo.NumVGPR;
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@ -204,14 +204,6 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;

-def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
-    return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
-    return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
 def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 }]>;
@ -243,14 +235,6 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;

-def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
-    return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
-    return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
 def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 }]>;
@ -299,16 +283,6 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
  return isGlobalStore(dyn_cast<StoreSDNode>(N));
 }]>;

-def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
-                                  (truncstorei8 node:$val, node:$ptr), [{
-  return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
-                                  (truncstorei16 node:$val, node:$ptr), [{
-  return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
 def local_store : PatFrag<(ops node:$val, node:$ptr),
                             (store node:$val, node:$ptr), [{
  return isLocalStore(dyn_cast<StoreSDNode>(N));
@ -385,15 +359,6 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {

 defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;

-def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-    return isFlatLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-
-def flat_store : PatFrag<(ops node:$val, node:$ptr),
-                         (store node:$val, node:$ptr), [{
-  return isFlatStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
 def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
                            (AMDGPUstore_mskor node:$val, node:$ptr), [{
  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@ -73,6 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
      CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
      EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
      EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
+      EnableXNACK(false),
      WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
      EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
      GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@ -76,6 +76,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
  bool EnableIfCvt;
  bool EnableLoadStoreOpt;
  bool EnableUnsafeDSOffsetFolding;
+  bool EnableXNACK;
  unsigned WavefrontSize;
  bool CFALUBug;
  int LocalMemorySize;
@ -290,6 +291,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
  }
  bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;

+  bool isXNACKEnabled() const {
+    return EnableXNACK;
+  }
+
  unsigned getMaxWavesPerCU() const {
    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
      return 10;
--- a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
@ -264,42 +264,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <

 } // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst

-//===----------------------------------------------------------------------===//
-// Flat Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasFlatAddressSpace] in {
-
-class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
-                             PatFrag flat_ld> :
-  Pat <(vt (flat_ld i64:$ptr)),
-       (Instr_ADDR64 $ptr, 0, 0, 0)
->;
-
-def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
-def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
-
-class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
-  Pat <(st vt:$value, i64:$ptr),
-        (Instr $value, $ptr, 0, 0, 0)
-  >;
-
-def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
-def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
-def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
-def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
-
-} // End HasFlatAddressSpace predicate
-
 let Predicates = [isCI] in {

 // Convert (x - floor(x)) to fract(x)
@ -320,20 +284,10 @@ def : Pat <


 //===----------------------------------------------------------------------===//
-// Patterns to generate flat for global
+// Flat Patterns
 //===----------------------------------------------------------------------===//

-def useFlatForGlobal : Predicate <
-  "Subtarget->useFlatForGlobal() || "
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
-
-let Predicates = [useFlatForGlobal] in {
-
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
-  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
-  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
->;
+let Predicates = [isCIVI] in {

 // Patterns for global loads with no offset
 class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
@ -341,24 +295,24 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
  (inst $addr, 0, 0, 0)
 >;

-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;

 class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
  (node vt:$data, i64:$addr),
  (inst $data, $addr, 0, 0, 0)
 >;

-def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
+def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
+def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;

 class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
  (vt (node i64:$addr, vt:$data)),
@ -376,4 +330,4 @@ def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
 def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;

-} // End Predicates = [useFlatForGlobal]
+} // End Predicates = [isCIVI]
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@ -105,51 +105,53 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
    MBB.addLiveIn(PreloadedPrivateBufferReg);
  }

-  // We reserved the last registers for this. Shift it down to the end of those
-  // which were actually used.
-  //
-  // FIXME: It might be safer to use a pseudoregister before replacement.
+  if (!ST.hasSGPRInitBug()) {
+    // We reserved the last registers for this. Shift it down to the end of those
+    // which were actually used.
+    //
+    // FIXME: It might be safer to use a pseudoregister before replacement.

-  // FIXME: We should be able to eliminate unused input registers. We only
-  // cannot do this for the resources required for scratch access. For now we
-  // skip over user SGPRs and may leave unused holes.
+    // FIXME: We should be able to eliminate unused input registers. We only
+    // cannot do this for the resources required for scratch access. For now we
+    // skip over user SGPRs and may leave unused holes.

-  // We find the resource first because it has an alignment requirement.
-  if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
-    MachineRegisterInfo &MRI = MF.getRegInfo();
+    // We find the resource first because it has an alignment requirement.
+    if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
+      MachineRegisterInfo &MRI = MF.getRegInfo();

-    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
-    // Skip the last 2 elements because the last one is reserved for VCC, and
-    // this is the 2nd to last element already.
-    for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
-      // Pick the first unallocated one. Make sure we don't clobber the other
-      // reserved input we needed.
-      if (!MRI.isPhysRegUsed(Reg)) {
-        assert(MRI.isAllocatable(Reg));
-        MRI.replaceRegWith(ScratchRsrcReg, Reg);
-        ScratchRsrcReg = Reg;
-        MFI->setScratchRSrcReg(ScratchRsrcReg);
-        break;
+      unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
+      // Skip the last 2 elements because the last one is reserved for VCC, and
+      // this is the 2nd to last element already.
+      for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
+        // Pick the first unallocated one. Make sure we don't clobber the other
+        // reserved input we needed.
+        if (!MRI.isPhysRegUsed(Reg)) {
+          assert(MRI.isAllocatable(Reg));
+          MRI.replaceRegWith(ScratchRsrcReg, Reg);
+          ScratchRsrcReg = Reg;
+          MFI->setScratchRSrcReg(ScratchRsrcReg);
+          break;
+        }
      }
    }
-  }

-  if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
-    MachineRegisterInfo &MRI = MF.getRegInfo();
-    // Skip the last 2 elements because the last one is reserved for VCC, and
-    // this is the 2nd to last element already.
-    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
-    for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
-      // Pick the first unallocated SGPR. Be careful not to pick an alias of the
-      // scratch descriptor, since we haven’t added its uses yet.
-      if (!MRI.isPhysRegUsed(Reg)) {
-        assert(MRI.isAllocatable(Reg) &&
-               !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
+    if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
+      MachineRegisterInfo &MRI = MF.getRegInfo();
+      // Skip the last 2 elements because the last one is reserved for VCC, and
+      // this is the 2nd to last element already.
+      unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+      for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
+        // Pick the first unallocated SGPR. Be careful not to pick an alias of the
+        // scratch descriptor, since we haven’t added its uses yet.
+        if (!MRI.isPhysRegUsed(Reg)) {
+          assert(MRI.isAllocatable(Reg) &&
+                !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));

-        MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
-        ScratchWaveOffsetReg = Reg;
-        MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
-        break;
+          MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+          ScratchWaveOffsetReg = Reg;
+          MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+          break;
+        }
      }
    }
  }
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@ -134,6 +134,34 @@ def SIconstdata_ptr : SDNode<
                                                     SDTCisVT<0, i64>]>
 >;

+//===----------------------------------------------------------------------===//
+// PatFrags for FLAT instructions
+//===----------------------------------------------------------------------===//
+
+class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
+                                               (ld node:$ptr), [{
+  return isFlatLoad(dyn_cast<LoadSDNode>(N)) ||
+         isGlobalLoad(dyn_cast<LoadSDNode>(N)) ||
+         isConstantLoad(cast<LoadSDNode>(N), -1);
+}]>;
+
+def flat_load : flat_ld <load>;
+def flat_az_extloadi8 : flat_ld <az_extloadi8>;
+def flat_sextloadi8 : flat_ld <sextloadi8>;
+def flat_az_extloadi16 : flat_ld <az_extloadi16>;
+def flat_sextloadi16 : flat_ld <sextloadi16>;
+
+class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
+                                               (st node:$val, node:$ptr), [{
+  return isFlatStore(dyn_cast<StoreSDNode>(N)) ||
+         isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def flat_store: flat_st <store>;
+def flat_truncstorei8 : flat_st <truncstorei8>;
+def flat_truncstorei16 : flat_st <truncstorei16>;
+
+
 def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
 	return isGlobalLoad(cast<LoadSDNode>(N)) ||
         isConstantLoad(cast<LoadSDNode>(N), -1);
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@ -59,8 +59,6 @@ defm EXP : EXP_m;
 // SMRD Instructions
 //===----------------------------------------------------------------------===//

-let mayLoad = 1 in {
-
 // We are using the SGPR_32 and not the SReg_32 register class for 32-bit
 // SMRD instructions, because the SGPR_32 register class does not include M0
 // and writing to M0 from an SMRD instruction will hang the GPU.
@ -90,8 +88,6 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
  smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
 >;

-} // mayLoad = 1
-
 //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;

 defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@ -156,6 +156,17 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(

  if (!LaneVGPRs.count(LaneVGPRIdx)) {
    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
+
+    if (LaneVGPR == AMDGPU::NoRegister) {
+      LLVMContext &Ctx = MF->getFunction()->getContext();
+      Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+
+      // When compiling from inside Mesa, the compilation continues.
+      // Select an arbitrary register to avoid triggering assertions
+      // during subsequent passes.
+      LaneVGPR = AMDGPU::VGPR0;
+    }
+
    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;

    // Add this register as live-in to all blocks to avoid machine verifer
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
  if (ST.hasSGPRInitBug()) {
    unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
+    if (ST.isXNACKEnabled())
+      BaseIdx -= 4;
+
    unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
    return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
  }

  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
-    // next sgpr128 down.
+    // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
+    // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
+    // either way.
    return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
  }

@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
  const MachineFunction &MF) const {
  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
  if (ST.hasSGPRInitBug()) {
-    unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+    unsigned Idx;
+
+    if (!ST.isXNACKEnabled())
+      Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+    else
+      Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
+
    return AMDGPU::SGPR_32RegClass.getRegister(Idx);
  }

  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    // Next register before reservations for flat_scr and vcc.
-    return AMDGPU::SGPR97;
+    if (!ST.isXNACKEnabled()) {
+      // Next register before reservations for flat_scr and vcc.
+      return AMDGPU::SGPR97;
+    } else {
+      // Next register before reservations for flat_scr, xnack_mask, vcc,
+      // and scratch resource.
+      return AMDGPU::SGPR91;
+    }
  }

  return AMDGPU::SGPR95;
@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
    // for VCC/FLAT_SCR.
    reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
    reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
+
+    if (ST.isXNACKEnabled())
+      reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
  }

  // Tonga and Iceland can only allocate a fixed number of SGPRs due
@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  if (ST.hasSGPRInitBug()) {
    unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
    // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
-    // Assume XNACK_MASK is unused.
    unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;

+    if (ST.isXNACKEnabled())
+      Limit -= 2;
+
    for (unsigned i = Limit; i < NumSGPRs; ++i) {
      unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
      reserveRegisterTuples(Reserved, Reg);
@ -282,11 +303,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

-        if (Spill.VGPR == AMDGPU::NoRegister) {
-           LLVMContext &Ctx = MF->getFunction()->getContext();
-           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
-        }
-
        BuildMI(*MBB, MI, DL,
                TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
                Spill.VGPR)
@ -315,11 +331,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
        struct SIMachineFunctionInfo::SpilledReg Spill =
            MFI->getSpilledReg(MF, Index, i);

-        if (Spill.VGPR == AMDGPU::NoRegister) {
-           LLVMContext &Ctx = MF->getFunction()->getContext();
-           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
-        }
-
        BuildMI(*MBB, MI, DL,
                TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                SubReg)
--- a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
@ -101,3 +101,12 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23,

 } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI

+let Predicates = [isVI] in {
+
+// 1. Offset as 20bit DWORD immediate
+def : Pat <
+  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+>;
+
+} // End Predicates = [isVI]
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@ -252,6 +252,8 @@ def ProcKrait   : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
 def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
                                   "Swift ARM processors", []>;

+def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
+                                    "Samsung Exynos-M1 processors", []>;

 def ProcR4      : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
                                    "Cortex-R4 ARM processors", []>;
@ -649,6 +651,12 @@ def : ProcessorModel<"cyclone",     SwiftModel,         [ARMv8a, ProcSwift,
                                                         FeatureCrypto,
                                                         FeatureZCZeroing]>;

+def : ProcNoItin<"exynos-m1",                           [ARMv8a, ProcExynosM1,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureT2XtPk,
+                                                         FeatureCrypto,
+                                                         FeatureCRC]>;

 //===----------------------------------------------------------------------===//
 // Register File Description
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@ -340,12 +340,12 @@ namespace {
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void ARMConstantIslands::verify() {
 #ifndef NDEBUG
-  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
-       MBBI != E; ++MBBI) {
-    MachineBasicBlock *MBB = &*MBBI;
-    unsigned MBBId = MBB->getNumber();
-    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
-  }
+  assert(std::is_sorted(MF->begin(), MF->end(),
+                        [this](const MachineBasicBlock &LHS,
+                               const MachineBasicBlock &RHS) {
+                          return BBInfo[LHS.getNumber()].postOffset() <
+                                 BBInfo[RHS.getNumber()].postOffset();
+                        }));
  DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
    CPUser &U = CPUsers[i];
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@ -1986,23 +1986,6 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  return AddedRegPressure.size() <= MemRegs.size() * 2;
 }

-
-/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
-static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
-                                   MachineInstr *Op1) {
-  assert(MI->memoperands_empty() && "expected a new machineinstr");
-  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
-    + (Op1->memoperands_end() - Op1->memoperands_begin());
-
-  MachineFunction *MF = MI->getParent()->getParent();
-  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
-  MachineSDNode::mmo_iterator MemEnd =
-    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
-  MemEnd =
-    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
-  MI->setMemRefs(MemBegin, MemEnd);
-}
-
 bool
 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
                                          DebugLoc &dl, unsigned &NewOpc,
@ -2196,7 +2179,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
            if (!isT2)
              MIB.addReg(0);
            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
-            concatenateMemOperands(MIB, Op0, Op1);
+            MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
            DEBUG(dbgs() << "Formed " << *MIB << "\n");
            ++NumLDRDFormed;
          } else {
@ -2210,7 +2193,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
            if (!isT2)
              MIB.addReg(0);
            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
-            concatenateMemOperands(MIB, Op0, Op1);
+            MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
            DEBUG(dbgs() << "Formed " << *MIB << "\n");
            ++NumSTRDFormed;
          }
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@ -44,7 +44,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
  enum ARMProcFamilyEnum {
    Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
    CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
-    CortexA57, CortexA72, Krait, Swift
+    CortexA57, CortexA72, Krait, Swift, ExynosM1
  };
  enum ARMProcClassEnum {
    None, AClass, RClass, MClass
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@ -251,6 +251,10 @@ def : Proc<"hexagonv60", HexagonModelV60,
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//

+def HexagonAsmParser : AsmParser {
+  bit HasMnemonicFirst = 0;
+}
+
 def HexagonAsmParserVariant : AsmParserVariant {
  int Variant = 0;
  string TokenizingCharacters = "#()=:.<>!+*";
@ -259,5 +263,6 @@ def HexagonAsmParserVariant : AsmParserVariant {
 def Hexagon : Target {
  // Pull in Instruction Info:
  let InstructionSet = HexagonInstrInfo;
+  let AssemblyParsers = [HexagonAsmParser];
  let AssemblyParserVariants = [HexagonAsmParserVariant];
 }
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@ -5807,3 +5807,5 @@ include "HexagonInstrInfoV60.td"
 include "HexagonInstrInfoVector.td"

 include "HexagonInstrAlias.td"
+include "HexagonSystemInst.td"
+
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td
@ -0,0 +1,113 @@
+//==- HexagonSystemInst.td - System Instructions for Hexagon -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//                     Cache manipulation instructions.
+//===----------------------------------------------------------------------===//
+let mayStore = 1 in
+class ST_MISC_CACHEOP<dag outs, dag ins,
+              string asmstr, list<dag> pattern = [],
+              bits<3> amode, bits<3> type, bits<1> un>
+  : ST0Inst<outs, ins, asmstr, pattern, "", ST_tc_ld_SLOT0> {
+
+    bits<5> Rs;
+    bits<5> Rt;
+    bits<5> Rd;
+    let Inst{31-28} = 0b1010;
+    let Inst{27-25} = amode;
+    let Inst{24-22} = type;
+    let Inst{21}    = un;
+    let Inst{20-16} = Rs;
+    let Inst{12-8}  = Rt;
+    let Inst{4-0}   = Rd;
+}
+
+let mayStore = 1 in
+class ST_MISC_CACHEOP_SYS<dag outs, dag ins,
+              string asmstr, list<dag> pattern = [],
+              bits<3> amode, bits<3> type, bits<1> un>
+  : SYSInst<outs, ins, asmstr, pattern, ""> {
+
+    bits<5> Rs;
+    bits<5> Rt;
+    bits<5> Rd;
+    let Inst{31-28} = 0b1010;
+    let Inst{27-25} = amode;
+    let Inst{24-22} = type;
+    let Inst{21}    = un;
+    let Inst{20-16} = Rs;
+    let Inst{12-8}  = Rt;
+    let Inst{4-0}   = Rd;
+}
+
+
+let isSolo = 1, Rs = 0, Rt = 0, Rd = 0 in {
+def Y2_syncht: ST_MISC_CACHEOP <(outs), (ins),
+    "syncht" , [], 0b100, 0b001, 0b0>;
+}
+
+let Rt = 0, Rd = 0 in {
+let isSoloAin1 = 1 in {
+  def Y2_dccleana: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
+      "dccleana($Rs)", [], 0b000, 0b000, 0b0>;
+  def Y2_dcinva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
+      "dcinva($Rs)", [], 0b000, 0b000, 0b1>;
+  def Y2_dccleaninva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
+      "dccleaninva($Rs)", [], 0b000, 0b001, 0b0>;
+  }
+}
+
+let isSoloAX = 1, hasSideEffects = 1, Rd = 0 in {
+  def Y4_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, IntRegs:$Rt),
+      "l2fetch($Rs, $Rt)", [], 0b011, 0b000, 0b0>;
+  def Y5_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, DoubleRegs:$Rt),
+      "l2fetch($Rs, $Rt)", [], 0b011, 0b010, 0b0>;
+}
+
+let hasSideEffects = 0, isSolo = 1 in
+class Y2_INVALIDATE_CACHE<string mnemonic, bit MajOp>
+  : JRInst <
+  (outs), (ins IntRegs:$Rs),
+  #mnemonic#"($Rs)" > {
+    bits<5> Rs;
+
+    let IClass = 0b0101;
+    let Inst{27-21} = 0b0110110;
+    let Inst{20-16} = Rs;
+    let Inst{13-12} = 0b00;
+    let Inst{11} = MajOp;
+  }
+// Instruction cache invalidate
+def Y2_icinva : Y2_INVALIDATE_CACHE<"icinva", 0b0>;
+
+// Zero an aligned 32-byte cacheline.
+let isSoloAin1 = 1 in
+def Y2_dczeroa: ST0Inst <(outs), (ins IntRegs:$Rs),
+  "dczeroa($Rs)"> {
+    bits<5> Rs;
+    let IClass = 0b1010;
+    let Inst{27-21} = 0b0000110;
+    let Inst{13} = 0b0;
+    let Inst{20-16} = Rs;
+  }
+
+// Memory synchronization.
+let hasSideEffects = 0, isSolo = 1 in
+def Y2_isync: JRInst <(outs), (ins),
+  "isync"> {
+    let IClass = 0b0101;
+    let Inst{27-16} = 0b011111000000;
+    let Inst{13} = 0b0;
+    let Inst{9-0} = 0b0000000010;
+  }
+
--- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@ -5,6 +5,23 @@
 pr38151.c
 va-arg-22.c

+# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"'
+20030313-1.c
+20030916-1.c
+20031012-1.c
+20041126-1.c
+20060420-1.c
+20071202-1.c
+20120808-1.c
+pr20527-1.c
+pr27073.c
+pr36339.c
+pr37573.c
+pr43236.c
+pr43835.c
+pr45070.c
+pr51933.c
+
 # TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
 struct-ret-1.c
 va-arg-11.c
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@ -32,7 +32,6 @@ static unsigned getVectorRegSize(unsigned RegNo) {
    return 64;

  llvm_unreachable("Unknown vector reg!");
-  return 0;
 }

 static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT,
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//

 #include "X86ShuffleDecode.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/CodeGen/MachineValueType.h"

 //===----------------------------------------------------------------------===//
@ -296,54 +295,6 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
  }
 }

-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  // It is not an error for the PSHUFB mask to not be a vector of i8 because the
-  // constant pool uniques constants by their bit representation.
-  // e.g. the following take up the same space in the constant pool:
-  //   i128 -170141183420855150465331762880109871104
-  //
-  //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
-  //
-  //   <4 x i32> <i32 -2147483648, i32 -2147483648,
-  //              i32 -2147483648, i32 -2147483648>
-
-#ifndef NDEBUG
-  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
-#endif
-
-  // This is a straightforward byte vector.
-  if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
-    int NumElements = MaskTy->getVectorNumElements();
-    ShuffleMask.reserve(NumElements);
-
-    for (int i = 0; i < NumElements; ++i) {
-      // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
-      // lane of the vector we're inside.
-      int Base = i & ~0xf;
-      Constant *COp = C->getAggregateElement(i);
-      if (!COp) {
-        ShuffleMask.clear();
-        return;
-      } else if (isa<UndefValue>(COp)) {
-        ShuffleMask.push_back(SM_SentinelUndef);
-        continue;
-      }
-      uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
-      // If the high bit (7) of the byte is set, the element is zeroed.
-      if (Element & (1 << 7))
-        ShuffleMask.push_back(SM_SentinelZero);
-      else {
-        // Only the least significant 4 bits of the byte are used.
-        int Index = Base + (Element & 0xf);
-        ShuffleMask.push_back(Index);
-      }
-    }
-  }
-  // TODO: Handle funny-looking vectors too.
-}
-
 void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
                      SmallVectorImpl<int> &ShuffleMask) {
  for (int i = 0, e = RawMask.size(); i < e; ++i) {
@ -388,68 +339,6 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
  }
 }

-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
-                        SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  // It is not an error for the PSHUFB mask to not be a vector of i8 because the
-  // constant pool uniques constants by their bit representation.
-  // e.g. the following take up the same space in the constant pool:
-  //   i128 -170141183420855150465331762880109871104
-  //
-  //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
-  //
-  //   <4 x i32> <i32 -2147483648, i32 -2147483648,
-  //              i32 -2147483648, i32 -2147483648>
-
-  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-
-  if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
-    return;
-
-  // Only support vector types.
-  if (!MaskTy->isVectorTy())
-    return;
-
-  // Make sure its an integer type.
-  Type *VecEltTy = MaskTy->getVectorElementType();
-  if (!VecEltTy->isIntegerTy())
-    return;
-
-  // Support any element type from byte up to element size.
-  // This is necesary primarily because 64-bit elements get split to 32-bit
-  // in the constant pool on 32-bit target.
-  unsigned EltTySize = VecEltTy->getIntegerBitWidth();
-  if (EltTySize < 8 || EltTySize > ElSize)
-    return;
-
-  unsigned NumElements = MaskTySize / ElSize;
-  assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
-         "Unexpected number of vector elements.");
-  ShuffleMask.reserve(NumElements);
-  unsigned NumElementsPerLane = 128 / ElSize;
-  unsigned Factor = ElSize / EltTySize;
-
-  for (unsigned i = 0; i < NumElements; ++i) {
-    Constant *COp = C->getAggregateElement(i * Factor);
-    if (!COp) {
-      ShuffleMask.clear();
-      return;
-    } else if (isa<UndefValue>(COp)) {
-      ShuffleMask.push_back(SM_SentinelUndef);
-      continue;
-    }
-    int Index = i & ~(NumElementsPerLane - 1);
-    uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
-    if (ElSize == 64)
-      Index += (Element >> 1) & 0x1;
-    else
-      Index += Element & 0x3;
-    ShuffleMask.push_back(Index);
-  }
-
-  // TODO: Handle funny-looking vectors too.
-}
-
 void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
  unsigned NumDstElts = DstVT.getVectorNumElements();
  unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
@ -572,58 +461,4 @@ void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
  }
 }

-void DecodeVPERMVMask(const Constant *C, MVT VT,
-                      SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  if (MaskTy->isVectorTy()) {
-    unsigned NumElements = MaskTy->getVectorNumElements();
-    if (NumElements == VT.getVectorNumElements()) {
-      for (unsigned i = 0; i < NumElements; ++i) {
-        Constant *COp = C->getAggregateElement(i);
-        if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
-          ShuffleMask.clear();
-          return;
-        }
-        if (isa<UndefValue>(COp))
-          ShuffleMask.push_back(SM_SentinelUndef);
-        else {
-          uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
-          Element &= (1 << NumElements) - 1;
-          ShuffleMask.push_back(Element);
-        }
-      }
-    }
-    return;
-  }
-  // Scalar value; just broadcast it
-  if (!isa<ConstantInt>(C))
-    return;
-  uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
-  int NumElements = VT.getVectorNumElements();
-  Element &= (1 << NumElements) - 1;
-  for (int i = 0; i < NumElements; ++i)
-    ShuffleMask.push_back(Element);
-}
-
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
-                       SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  unsigned NumElements = MaskTy->getVectorNumElements();
-  if (NumElements == VT.getVectorNumElements()) {
-    for (unsigned i = 0; i < NumElements; ++i) {
-      Constant *COp = C->getAggregateElement(i);
-      if (!COp) {
-        ShuffleMask.clear();
-        return;
-      }
-      if (isa<UndefValue>(COp))
-        ShuffleMask.push_back(SM_SentinelUndef);
-      else {
-        uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
-        Element &= (1 << NumElements*2) - 1;
-        ShuffleMask.push_back(Element);
-      }
-    }
-  }
-}
 } // llvm namespace
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@ -23,7 +23,6 @@
 //===----------------------------------------------------------------------===//

 namespace llvm {
-class Constant;
 class MVT;

 enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 };
@ -72,9 +71,6 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 /// different datatypes and vector widths.
 void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);

-/// \brief Decode a PSHUFB mask from an IR-level vector constant.
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
-
 /// \brief Decode a PSHUFB mask from a raw array of constants such as from
 /// BUILD_VECTOR.
 void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
@ -95,10 +91,6 @@ void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
 /// No VT provided since it only works on 256-bit, 4 element vectors.
 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

-/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
-                        SmallVectorImpl<int> &ShuffleMask);
-
 /// \brief Decode a zero extension instruction as a shuffle mask.
 void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
                          SmallVectorImpl<int> &ShuffleMask);
@ -118,18 +110,10 @@ void DecodeEXTRQIMask(int Len, int Idx,
 void DecodeINSERTQIMask(int Len, int Idx,
                        SmallVectorImpl<int> &ShuffleMask);

-/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMVMask(const Constant *C, MVT VT,
-                      SmallVectorImpl<int> &ShuffleMask);
-
 /// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
 void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
                      SmallVectorImpl<int> &ShuffleMask);

-/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
-                      SmallVectorImpl<int> &ShuffleMask);
-
 /// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
 void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
                      SmallVectorImpl<int> &ShuffleMask);
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@ -1098,9 +1098,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
    RetRegs.push_back(VA.getLocReg());
  }

-  // All x86 ABIs require that for returning structs by value we copy
-  // the sret argument into %rax/%eax (depending on ABI) for the return.
-  // We saved the argument into a virtual register in the entry block,
+  // All x86 ABIs require that for returning structs by value we copy
+  // the sret argument into %rax/%eax (depending on ABI) for the return.
+  // We saved the argument into a virtual register in the entry block,
  // so now we copy the value out and into %rax/%eax.
  if (F.hasStructRetAttr()) {
    unsigned Reg = X86MFInfo->getSRetReturnReg();
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@ -78,27 +78,6 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
         MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
 }

-/// usesTheStack - This function checks if any of the users of EFLAGS
-/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
-/// to use the stack, and if we don't adjust the stack we clobber the first
-/// frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(const MachineFunction &MF) {
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  // Conservativley assume that inline assembly might use the stack.
-  if (MF.hasInlineAsm())
-    return true;
-
-  return any_of(MRI.reg_instructions(X86::EFLAGS),
-                [](const MachineInstr &RI) { return RI.isCopy(); });
-}
-
-static bool doesStackUseImplyFP(const MachineFunction &MF) {
-  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
-  return IsWin64Prologue && usesTheStack(MF);
-}
-
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
@ -112,8 +91,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
          MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
          MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
-          MFI->hasStackMap() || MFI->hasPatchPoint() ||
-          doesStackUseImplyFP(MF));
+          MFI->hasStackMap() || MFI->hasPatchPoint());
 }

 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@ -965,11 +943,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
  // push and pop from the stack.
  if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
      !TRI->needsStackRealignment(MF) &&
-      !MFI->hasVarSizedObjects() && // No dynamic alloca.
-      !MFI->adjustsStack() &&       // No calls.
-      !IsWin64CC &&                 // Win64 has no Red Zone
-      !usesTheStack(MF) &&          // Don't push and pop.
-      !MF.shouldSplitStack()) {     // Regular stack
+      !MFI->hasVarSizedObjects() &&    // No dynamic alloca.
+      !MFI->adjustsStack() &&          // No calls.
+      !IsWin64CC &&                    // Win64 has no Red Zone
+      !MFI->hasOpaqueSPAdjustment() && // Don't push and pop.
+      !MF.shouldSplitStack()) {        // Regular stack
    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
    if (HasFP) MinSize += SlotSize;
    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -157,13 +157,9 @@ namespace {
    /// performance.
    bool OptForSize;

-    /// If true, selector should try to optimize for minimum code size.
-    bool OptForMinSize;
-
  public:
    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
-        : SelectionDAGISel(tm, OptLevel), OptForSize(false),
-          OptForMinSize(false) {}
+        : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}

    const char *getPassName() const override {
      return "X86 DAG->DAG Instruction Selection";
@ -535,10 +531,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
 }

 void X86DAGToDAGISel::PreprocessISelDAG() {
-  // OptFor[Min]Size are used in pattern predicates that isel is matching.
+  // OptForSize is used in pattern predicates that isel is matching.
  OptForSize = MF->getFunction()->optForSize();
-  OptForMinSize = MF->getFunction()->optForMinSize();
-  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");

  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
       E = CurDAG->allnodes_end(); I != E; ) {
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -18,6 +18,7 @@
 #include "X86FrameLowering.h"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
+#include "X86ShuffleDecodeConstantPool.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
 #include "llvm/ADT/SmallBitVector.h"
@ -4556,6 +4557,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
    MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;

    SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
+    Result = DAG.getBitcast(CastVT, Result);
    Vec256 = DAG.getBitcast(CastVT, Vec256);
    Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
    return DAG.getBitcast(ResultVT, Vec256);
@ -4851,8 +4853,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,

    if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
      DecodePSHUFBMask(C, Mask);
-      if (Mask.empty())
-        return false;
      break;
    }

@ -4870,7 +4870,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
  case X86ISD::VPERM2X128:
    ImmN = N->getOperand(N->getNumOperands()-1);
    DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
-    if (Mask.empty()) return false;
    // Mask only contains negative index if an element is zero.
    if (std::any_of(Mask.begin(), Mask.end(),
                    [](int M){ return M == SM_SentinelZero; }))
@ -4948,8 +4947,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,

    if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
      DecodeVPERMVMask(C, VT, Mask);
-      if (Mask.empty())
-        return false;
      break;
    }
    return false;
@ -5000,8 +4997,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,

    if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
      DecodeVPERMV3Mask(C, VT, Mask);
-      if (Mask.empty())
-        return false;
      break;
    }
    return false;
@ -5009,6 +5004,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
  default: llvm_unreachable("unknown target shuffle node");
  }

+  // Empty mask indicates the decode failed.
+  if (Mask.empty())
+    return false;
+
  // If we have a fake unary shuffle, the shuffle mask is spread across two
  // inputs that are actually the same node. Re-map the mask to always point
  // into the first input.
@ -17372,6 +17371,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
  if (!IntrData) {
    if (IntNo == llvm::Intrinsic::x86_seh_ehregnode)
      return MarkEHRegistrationNode(Op, DAG);
+    if (IntNo == llvm::Intrinsic::x86_flags_read_u32 ||
+        IntNo == llvm::Intrinsic::x86_flags_read_u64 ||
+        IntNo == llvm::Intrinsic::x86_flags_write_u32 ||
+        IntNo == llvm::Intrinsic::x86_flags_write_u64) {
+      // We need a frame pointer because this will get lowered to a PUSH/POP
+      // sequence.
+      MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+      MFI->setHasOpaqueSPAdjustment(true);
+      // Don't do anything here, we will expand these intrinsics out later
+      // during ExpandISelPseudos in EmitInstrWithCustomInserter.
+      return SDValue();
+    }
    return SDValue();
  }

@ -21144,6 +21155,47 @@ static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
  return BB;
 }

+static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB,
+                                     const X86Subtarget *Subtarget) {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+  // insert input VAL into EAX
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+                           .addReg(MI->getOperand(0).getReg());
+  // insert zero to ECX
+  BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
+                           .addReg(X86::ECX)
+                           .addReg(X86::ECX);
+  // insert zero to EDX
+  BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::EDX)
+                           .addReg(X86::EDX)
+                           .addReg(X86::EDX);
+  // insert WRPKRU instruction
+  BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr));
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
+static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB,
+                                     const X86Subtarget *Subtarget) {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+  // insert zero to ECX
+  BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
+                           .addReg(X86::ECX)
+                           .addReg(X86::ECX);
+  // insert RDPKRU instruction
+  BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr));
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+                           .addReg(X86::EAX);
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
 static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
                                      const X86Subtarget *Subtarget) {
  DebugLoc dl = MI->getDebugLoc();
@ -22495,6 +22547,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  case X86::CMOV_V64I1:
    return EmitLoweredSelect(MI, BB);

+  case X86::RDFLAGS32:
+  case X86::RDFLAGS64: {
+    DebugLoc DL = MI->getDebugLoc();
+    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+    unsigned PushF =
+        MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
+    unsigned Pop =
+        MI->getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
+    BuildMI(*BB, MI, DL, TII->get(PushF));
+    BuildMI(*BB, MI, DL, TII->get(Pop), MI->getOperand(0).getReg());
+
+    MI->eraseFromParent(); // The pseudo is gone now.
+    return BB;
+  }
+
+  case X86::WRFLAGS32:
+  case X86::WRFLAGS64: {
+    DebugLoc DL = MI->getDebugLoc();
+    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+    unsigned Push =
+        MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
+    unsigned PopF =
+        MI->getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
+    BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI->getOperand(0).getReg());
+    BuildMI(*BB, MI, DL, TII->get(PopF));
+
+    MI->eraseFromParent(); // The pseudo is gone now.
+    return BB;
+  }
+
  case X86::RELEASE_FADD32mr:
  case X86::RELEASE_FADD64mr:
    return EmitLoweredAtomicFP(MI, BB);
@ -22611,7 +22693,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  // Thread synchronization.
  case X86::MONITOR:
    return EmitMonitor(MI, BB, Subtarget);
-
+  // PKU feature
+  case X86::WRPKRU:
+    return EmitWRPKRU(MI, BB, Subtarget);
+  case X86::RDPKRU:
+    return EmitRDPKRU(MI, BB, Subtarget);
  // xbegin
  case X86::XBEGIN:
    return EmitXBegin(MI, BB, Subtarget->getInstrInfo());
@ -23480,6 +23566,31 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
    }
    return SDValue();
  }
+  case X86ISD::BLENDI: {
+    SDValue V0 = N->getOperand(0);
+    SDValue V1 = N->getOperand(1);
+    assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() &&
+           "Unexpected input vector types");
+
+    // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
+    // operands and changing the mask to 1. This saves us a bunch of
+    // pattern-matching possibilities related to scalar math ops in SSE/AVX.
+    // x86InstrInfo knows how to commute this back after instruction selection
+    // if it would help register allocation.
+
+    // TODO: If optimizing for size or a processor that doesn't suffer from
+    // partial register update stalls, this should be transformed into a MOVSD
+    // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
+
+    if (VT == MVT::v2f64)
+      if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+        if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
+          SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
+          return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
+        }
+
+    return SDValue();
+  }
  default:
    return SDValue();
  }
@ -23573,9 +23684,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
 /// the operands which explicitly discard the lanes which are unused by this
 /// operation to try to flow through the rest of the combiner the fact that
 /// they're unused.
-static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget,
+                                      SelectionDAG &DAG) {
  SDLoc DL(N);
  EVT VT = N->getValueType(0);
+  if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+      (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+    return SDValue();

  // We only handle target-independent shuffles.
  // FIXME: It would be easy and harmless to use the target shuffle mask
@ -23617,12 +23732,6 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
        isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
    return SDValue();

-  // Only specific types are legal at this point, assert so we notice if and
-  // when these change.
-  assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
-          VT == MVT::v4f64) &&
-         "Unknown vector type encountered!");
-
  return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
 }

@ -23642,8 +23751,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,

  // If we have legalized the vector types, look for blends of FADD and FSUB
  // nodes that we can fuse into an ADDSUB node.
-  if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
-    if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
+  if (TLI.isTypeLegal(VT))
+    if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
      return AddSub;

  // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
@ -27310,7 +27419,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
  // from AH (which we otherwise need to do contortions to access).
  if (N0.getOpcode() == ISD::UDIVREM &&
      N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
-      (VT == MVT::i32 || VT == MVT::i64)) {
+      VT == MVT::i32) {
    SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
    SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
                            N0.getOperand(0), N0.getOperand(1));
@ -27382,32 +27491,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
  return SDValue();
 }

-static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
-  SDValue V0 = N->getOperand(0);
-  SDValue V1 = N->getOperand(1);
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
-
-  // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
-  // operands and changing the mask to 1. This saves us a bunch of
-  // pattern-matching possibilities related to scalar math ops in SSE/AVX.
-  // x86InstrInfo knows how to commute this back after instruction selection
-  // if it would help register allocation.
-
-  // TODO: If optimizing for size or a processor that doesn't suffer from
-  // partial register update stalls, this should be transformed into a MOVSD
-  // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
-
-  if (VT == MVT::v2f64)
-    if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
-      if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
-        SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
-        return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
-      }
-
-  return SDValue();
-}
-
 static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
  SDLoc DL(N);
  // Gather and Scatter instructions use k-registers for masks. The type of
@ -27840,6 +27923,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  case X86ISD::FANDN:       return PerformFANDNCombine(N, DAG, Subtarget);
  case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
  case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
+// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated.
  case ISD::ANY_EXTEND:
  case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG, DCI, Subtarget);
  case ISD::SIGN_EXTEND:    return PerformSExtCombine(N, DAG, DCI, Subtarget);
@ -27851,6 +27935,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  case X86ISD::VZEXT:       return performVZEXTCombine(N, DAG, DCI, Subtarget);
  case X86ISD::SHUFP:       // Handle all target specific shuffles
  case X86ISD::PALIGNR:
+  case X86ISD::BLENDI:
  case X86ISD::UNPCKH:
  case X86ISD::UNPCKL:
  case X86ISD::MOVHLPS:
@ -27865,7 +27950,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  case X86ISD::VPERM2X128:
  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
  case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
-  case X86ISD::BLENDI:    return PerformBLENDICombine(N, DAG);
  case ISD::MGATHER:
  case ISD::MSCATTER:       return PerformGatherScatterCombine(N, DAG);
  }
@ -27902,6 +27986,18 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
  }
 }

+/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
+/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
+/// we don't adjust the stack we clobber the first frame index.
+/// See X86InstrInfo::copyPhysReg.
+bool X86TargetLowering::hasCopyImplyingStackAdjustment(
+    MachineFunction *MF) const {
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  return any_of(MRI.reg_instructions(X86::EFLAGS),
+                [](const MachineInstr &RI) { return RI.isCopy(); });
+}
+
 /// IsDesirableToPromoteOp - This method query the target whether it is
 /// beneficial for dag combiner to promote the specified node. If true, it
 /// should return the desired promotion type by reference.
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@ -697,6 +697,10 @@ namespace llvm {
    /// and some i16 instructions are slow.
    bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;

+    /// Return true if the MachineFunction contains a COPY which would imply
+    /// HasOpaqueSPAdjustment.
+    bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override;
+
    MachineBasicBlock *
      EmitInstrWithCustomInserter(MachineInstr *MI,
                                  MachineBasicBlock *MBB) const override;
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@ -2366,6 +2366,7 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
                             RegisterClass KRCSrc, Predicate prd> {
  let Predicates = [prd] in {
+    let hasSideEffects = 0 in
    def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
               (ins KRC:$src1, KRC:$src2),
               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
--- a/Show More
+++ b/Show More