Merge llvm, clang, lld, lldb, compiler-rt and libc++ release_70 branch

r339355, resolve conflicts, and bump version numbers.
svn path=/projects/clang700-import/; revision=337645
2018-08-11 16:40:03 +00:00 · 2018-08-11 16:40:03 +00:00 · 3beb5372da · 2020-12-20 02:59:44 +00:00
commit 3beb5372da
parent 8f09c33a23 46faa67da1 2a0cb7949b 4504f4f22a 786d82c295 e984e73e53 ac3a3c1340
26 changed files with 438 additions and 271 deletions
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@ -1863,6 +1863,40 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                      MaxRecurse))
      return V;

+  // Assuming the effective width of Y is not larger than A, i.e. all bits
+  // from X and Y are disjoint in (X << A) | Y,
+  // if the mask of this AND op covers all bits of X or Y, while it covers
+  // no bits from the other, we can bypass this AND op. E.g.,
+  // ((X << A) | Y) & Mask -> Y,
+  //     if Mask = ((1 << effective_width_of(Y)) - 1)
+  // ((X << A) | Y) & Mask -> X << A,
+  //     if Mask = ((1 << effective_width_of(X)) - 1) << A
+  // SimplifyDemandedBits in InstCombine can optimize the general case.
+  // This pattern aims to help other passes for a common case.
+  Value *Y, *XShifted;
+  if (match(Op1, m_APInt(Mask)) &&
+      match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
+                                     m_Value(XShifted)),
+                        m_Value(Y)))) {
+    const unsigned ShftCnt = ShAmt->getZExtValue();
+    const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    const unsigned Width = Op0->getType()->getScalarSizeInBits();
+    const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+    if (EffWidthY <= ShftCnt) {
+      const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
+                                                Q.DT);
+      const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
+      const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
+      const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
+      // If the mask is extracting all bits from X or Y as is, we can skip
+      // this AND op.
+      if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
+        return Y;
+      if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
+        return XShifted;
+    }
+  }
+
  return nullptr;
 }

--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@ -2817,10 +2817,13 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
    default:
      break;
    case Intrinsic::maxnum:
-      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
-                                             Depth + 1) ||
-             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
-                                             Depth + 1);
+      return (isKnownNeverNaN(I->getOperand(0)) &&
+              cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
+                                              SignBitOnly, Depth + 1)) ||
+             (isKnownNeverNaN(I->getOperand(1)) &&
+              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
+                                              SignBitOnly, Depth + 1));
+
    case Intrinsic::minnum:
      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
                                             Depth + 1) &&
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -1489,24 +1489,20 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {

  // Get the signbit at the right position for MagAsInt.
  int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
-  if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
-    if (ShiftAmount > 0) {
-      SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT);
-      SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst);
-    } else if (ShiftAmount < 0) {
-      SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT);
-      SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst);
-    }
-    SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
-  } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+  EVT ShiftVT = IntVT;
+  if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
    SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
-    if (ShiftAmount > 0) {
-      SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT);
-      SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst);
-    } else if (ShiftAmount < 0) {
-      SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT);
-      SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst);
-    }
+    ShiftVT = MagVT;
+  }
+  if (ShiftAmount > 0) {
+    SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT);
+    SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst);
+  } else if (ShiftAmount < 0) {
+    SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
+    SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
+  }
+  if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
+    SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
  }

  // Store the part with the modified sign and convert back to float.
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -3641,26 +3641,43 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
 }

 SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
-  assert(OpNo == 3 && "Can widen only data operand of mstore");
+  assert((OpNo == 2 || OpNo == 3) &&
+         "Can widen only data or mask operand of mstore");
  MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
  SDValue Mask = MST->getMask();
  EVT MaskVT = Mask.getValueType();
  SDValue StVal = MST->getValue();
-  // Widen the value
-  SDValue WideVal = GetWidenedVector(StVal);
  SDLoc dl(N);

-  // The mask should be widened as well.
-  EVT WideVT = WideVal.getValueType();
-  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
-                                    MaskVT.getVectorElementType(),
-                                    WideVT.getVectorNumElements());
-  Mask = ModifyToType(Mask, WideMaskVT, true);
+  if (OpNo == 3) {
+    // Widen the value
+    StVal = GetWidenedVector(StVal);
+
+    // The mask should be widened as well.
+    EVT WideVT = StVal.getValueType();
+    EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+                                      MaskVT.getVectorElementType(),
+                                      WideVT.getVectorNumElements());
+    Mask = ModifyToType(Mask, WideMaskVT, true);
+  } else {
+    EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
+    Mask = ModifyToType(Mask, WideMaskVT, true);
+
+    EVT ValueVT = StVal.getValueType();
+    if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector)
+      StVal = GetWidenedVector(StVal);
+    else {
+      EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+                                    ValueVT.getVectorElementType(),
+                                    WideMaskVT.getVectorNumElements());
+      StVal = ModifyToType(StVal, WideVT);
+    }
+  }

  assert(Mask.getValueType().getVectorNumElements() ==
-         WideVal.getValueType().getVectorNumElements() &&
+         StVal.getValueType().getVectorNumElements() &&
         "Mask and data vectors should have the same number of elements");
-  return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+  return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
                            Mask, MST->getMemoryVT(), MST->getMemOperand(),
                            false, MST->isCompressingStore());
 }
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@ -481,34 +481,6 @@ static bool hasPrefix(StringRef SectionName, StringRef Prefix) {
  return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back();
 }

-// Return a set of section flags based on the section name that can then
-// be augmented later, otherwise return 0 if we don't have any reasonable
-// defaults.
-static unsigned defaultSectionFlags(StringRef SectionName) {
-
-  if (hasPrefix(SectionName, ".rodata.cst"))
-    return ELF::SHF_ALLOC | ELF::SHF_MERGE;
-
-  if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1")
-    return ELF::SHF_ALLOC;
-
-  if (SectionName == ".fini" || SectionName == ".init" ||
-      hasPrefix(SectionName, ".text."))
-    return ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
-
-  if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" ||
-      hasPrefix(SectionName, ".bss.") ||
-      hasPrefix(SectionName, ".init_array.") ||
-      hasPrefix(SectionName, ".fini_array.") ||
-      hasPrefix(SectionName, ".preinit_array."))
-    return ELF::SHF_ALLOC | ELF::SHF_WRITE;
-
-  if (hasPrefix(SectionName, ".tdata.") || hasPrefix(SectionName, ".tbss."))
-    return ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS;
-
-  return 0;
-}
-
 bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
  StringRef SectionName;

@ -518,13 +490,27 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
  StringRef TypeName;
  int64_t Size = 0;
  StringRef GroupName;
+  unsigned Flags = 0;
  const MCExpr *Subsection = nullptr;
  bool UseLastGroup = false;
  MCSymbolELF *Associated = nullptr;
  int64_t UniqueID = ~0;

-  // Set the default section flags first in case no others are given.
-  unsigned Flags = defaultSectionFlags(SectionName);
+  // Set the defaults first.
+  if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1")
+    Flags |= ELF::SHF_ALLOC;
+  else if (SectionName == ".fini" || SectionName == ".init" ||
+           hasPrefix(SectionName, ".text."))
+    Flags |= ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
+  else if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" ||
+           hasPrefix(SectionName, ".bss.") ||
+           hasPrefix(SectionName, ".init_array.") ||
+           hasPrefix(SectionName, ".fini_array.") ||
+           hasPrefix(SectionName, ".preinit_array."))
+    Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE;
+  else if (hasPrefix(SectionName, ".tdata.") ||
+           hasPrefix(SectionName, ".tbss."))
+    Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS;

  if (getLexer().is(AsmToken::Comma)) {
    Lex();
@ -552,12 +538,6 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {

    if (extraFlags == -1U)
      return TokError("unknown flag");
-
-    // If we found additional section flags on a known section then give a
-    // warning.
-    if (Flags && Flags != extraFlags)
-      Warning(loc, "setting incorrect section attributes for " + SectionName);
-
    Flags |= extraFlags;

    bool Mergeable = Flags & ELF::SHF_MERGE;
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@ -267,15 +267,6 @@ def FeatureD16PreservesUnusedBits : SubtargetFeature<
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//

-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
-  "FP32Denormals",
-  "true",
-  "Enable single precision denormal handling"
->;
-
 // Denormal handling for fp64 and fp16 is controlled by the same
 // config register when fp16 supported.
 // TODO: Do we need a separate f16 setting when not legal?
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@ -19,6 +19,15 @@ def FeatureFMA : SubtargetFeature<"fmaf",
  "Enable single precision FMA (not as fast as mul+add, but fused)"
 >;

+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+  "FP32Denormals",
+  "true",
+  "Enable single precision denormal handling"
+>;
+
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
  "localmemorysize"#Value,
  "LocalMemorySize",
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
                                                   unsigned DwordOffset) const {
  unsigned ByteOffset = DwordOffset * 4;
  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                      AMDGPUASI.CONSTANT_BUFFER_0);
+                                      AMDGPUASI.PARAM_I_ADDRESS);

  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
  assert(isInt<16>(ByteOffset));
@ -1457,33 +1457,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
      return scalarizeVectorLoad(LoadNode, DAG);
  }

+  // This is still used for explicit load from addrspace(8)
  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
  if (ConstantBlock > -1 &&
      ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
       (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
    SDValue Result;
-    if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
-        isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
+    if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
        isa<ConstantSDNode>(Ptr)) {
-      SDValue Slots[4];
-      for (unsigned i = 0; i < 4; i++) {
-        // We want Const position encoded with the following formula :
-        // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
-        // const_index is Ptr computed by llvm using an alignment of 16.
-        // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
-        // then div by 4 at the ISel step
-        SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-            DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
-        Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
-      }
-      EVT NewVT = MVT::v4i32;
-      unsigned NumElements = 4;
-      if (VT.isVector()) {
-        NewVT = VT;
-        NumElements = VT.getVectorNumElements();
-      }
-      Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+      return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
    } else {
+      //TODO: Does this even work?
      // non-constant ptr can't be folded, keeps it as a v4f32 load
      Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
@ -1622,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
    }

    PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                          AMDGPUASI.CONSTANT_BUFFER_0);
+                                          AMDGPUASI.PARAM_I_ADDRESS);

    // i64 isn't a legal type, so the register type used ends up as i32, which
    // isn't expected here. It attempts to create this sextload, but it ends up
@ -1646,17 +1630,17 @@ SDValue R600TargetLowering::LowerFormalArguments(

    unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
    unsigned PartOffset = VA.getLocMemOffset();
+    unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);

    MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
    SDValue Arg = DAG.getLoad(
        ISD::UNINDEXED, Ext, VT, DL, Chain,
        DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
        PtrInfo,
-        MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
+        MemVT, Alignment, MachineMemOperand::MONonTemporal |
                                        MachineMemOperand::MODereferenceable |
                                        MachineMemOperand::MOInvariant);

-    // 4 is the preferred alignment for the CONSTANT memory space.
    InVals.push_back(Arg);
  }
  return Chain;
@ -1804,6 +1788,52 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
  return BuildVector;
 }

+SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
+                                            SelectionDAG &DAG) const {
+  SDLoc DL(LoadNode);
+  EVT VT = LoadNode->getValueType(0);
+  SDValue Chain = LoadNode->getChain();
+  SDValue Ptr = LoadNode->getBasePtr();
+  assert (isa<ConstantSDNode>(Ptr));
+
+  //TODO: Support smaller loads
+  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
+    return SDValue();
+
+  if (LoadNode->getAlignment() < 4)
+    return SDValue();
+
+  int ConstantBlock = ConstantAddressBlock(Block);
+
+  SDValue Slots[4];
+  for (unsigned i = 0; i < 4; i++) {
+    // We want Const position encoded with the following formula :
+    // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+    // const_index is Ptr computed by llvm using an alignment of 16.
+    // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+    // then div by 4 at the ISel step
+    SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+        DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
+    Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+  }
+  EVT NewVT = MVT::v4i32;
+  unsigned NumElements = 4;
+  if (VT.isVector()) {
+    NewVT = VT;
+    NumElements = VT.getVectorNumElements();
+  }
+  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+  if (!VT.isVector()) {
+    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+                         DAG.getConstant(0, DL, MVT::i32));
+  }
+  SDValue MergedValues[2] = {
+    Result,
+    Chain
+  };
+  return DAG.getMergeValues(MergedValues, DL);
+}
+
 //===----------------------------------------------------------------------===//
 // Custom DAG Optimizations
 //===----------------------------------------------------------------------===//
@ -2022,6 +2052,16 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
    return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
  }
+
+  case ISD::LOAD: {
+    LoadSDNode *LoadNode = cast<LoadSDNode>(N);
+    SDValue Ptr = LoadNode->getBasePtr();
+    if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
+         isa<ConstantSDNode>(Ptr))
+      return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
+    break;
+  }
+
  default: break;
  }

--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@ -98,9 +98,11 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
  bool isHWTrueValue(SDValue Op) const;
  bool isHWFalseValue(SDValue Op) const;

- bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
-                  SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
-                  SelectionDAG &DAG) const;
+  bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
+                   SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
+                   SelectionDAG &DAG) const;
+  SDValue constBufferLoad(LoadSDNode *LoadNode, int Block,
+                          SelectionDAG &DAG) const;

  SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
 };
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@ -461,17 +461,6 @@ def : GCNPat <
  (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
 >;

-def : GCNPat<
-  (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
-  (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
->;
-
-def : GCNPat<
-  (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
-   (REG_SEQUENCE VReg_64,
-     (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0,
-     (V_MOV_B32_e32 (i32 0)), sub1)
->;
 }

 defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@ -16,6 +16,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@ -81,10 +82,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {

  switch (N->getOpcode()) {
  case ISD::LOAD:
+  case ISD::ATOMIC_LOAD:
    if (tryLoad(N))
      return;
    break;
  case ISD::STORE:
+  case ISD::ATOMIC_STORE:
    if (tryStore(N))
      return;
    break;
@ -834,17 +837,27 @@ static Optional<unsigned> pickOpcodeForVT(

 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
  SDLoc dl(N);
-  LoadSDNode *LD = cast<LoadSDNode>(N);
+  MemSDNode *LD = cast<MemSDNode>(N);
+  assert(LD->readMem() && "Expected load");
+  LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
  EVT LoadedVT = LD->getMemoryVT();
  SDNode *NVPTXLD = nullptr;

  // do not support pre/post inc/dec
-  if (LD->isIndexed())
+  if (PlainLoad && PlainLoad->isIndexed())
    return false;

  if (!LoadedVT.isSimple())
    return false;

+  AtomicOrdering Ordering = LD->getOrdering();
+  // In order to lower atomic loads with stronger guarantees we would need to
+  // use load.acquire or insert fences. However these features were only added
+  // with PTX ISA 6.0 / sm_70.
+  // TODO: Check if we can actually use the new instructions and implement them.
+  if (isStrongerThanMonotonic(Ordering))
+    return false;
+
  // Address Space Setting
  unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
  if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
@ -855,8 +868,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
      CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());

  // Volatile Setting
-  // - .volatile is only availalble for .global and .shared
-  bool isVolatile = LD->isVolatile();
+  // - .volatile is only available for .global and .shared
+  // - .volatile has the same memory synchronization semantics as .relaxed.sys
+  bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
@ -882,7 +896,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
    fromTypeWidth = 32;
  }

-  if ((LD->getExtensionType() == ISD::SEXTLOAD))
+  if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
    fromType = NVPTX::PTXLdStInstCode::Signed;
  else if (ScalarVT.isFloatingPoint())
    // f16 uses .b16 as its storage type.
@ -1691,25 +1705,38 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {

 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
  SDLoc dl(N);
-  StoreSDNode *ST = cast<StoreSDNode>(N);
+  MemSDNode *ST = cast<MemSDNode>(N);
+  assert(ST->writeMem() && "Expected store");
+  StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
+  AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
+  assert((PlainStore || AtomicStore) && "Expected store");
  EVT StoreVT = ST->getMemoryVT();
  SDNode *NVPTXST = nullptr;

  // do not support pre/post inc/dec
-  if (ST->isIndexed())
+  if (PlainStore && PlainStore->isIndexed())
    return false;

  if (!StoreVT.isSimple())
    return false;

+  AtomicOrdering Ordering = ST->getOrdering();
+  // In order to lower atomic loads with stronger guarantees we would need to
+  // use store.release or insert fences. However these features were only added
+  // with PTX ISA 6.0 / sm_70.
+  // TODO: Check if we can actually use the new instructions and implement them.
+  if (isStrongerThanMonotonic(Ordering))
+    return false;
+
  // Address Space Setting
  unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
  unsigned int PointerSize =
      CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());

  // Volatile Setting
-  // - .volatile is only availalble for .global and .shared
-  bool isVolatile = ST->isVolatile();
+  // - .volatile is only available for .global and .shared
+  // - .volatile has the same memory synchronization semantics as .relaxed.sys
+  bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
@ -1739,41 +1766,53 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
    toType = NVPTX::PTXLdStInstCode::Unsigned;

  // Create the machine instruction DAG
-  SDValue Chain = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  SDValue N2 = N->getOperand(2);
+  SDValue Chain = ST->getChain();
+  SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
+  SDValue BasePtr = ST->getBasePtr();
  SDValue Addr;
  SDValue Offset, Base;
  Optional<unsigned> Opcode;
-  MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
+  MVT::SimpleValueType SourceVT =
+      Value.getNode()->getSimpleValueType(0).SimpleTy;

-  if (SelectDirectAddr(N2, Addr)) {
+  if (SelectDirectAddr(BasePtr, Addr)) {
    Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
                             NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
                             NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
                             NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
    if (!Opcode)
      return false;
-    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
-                      getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
-                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
-                      Chain };
+    SDValue Ops[] = {Value,
+                     getI32Imm(isVolatile, dl),
+                     getI32Imm(CodeAddrSpace, dl),
+                     getI32Imm(vecType, dl),
+                     getI32Imm(toType, dl),
+                     getI32Imm(toTypeWidth, dl),
+                     Addr,
+                     Chain};
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
-  } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
-                               : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+  } else if (PointerSize == 64
+                 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
+                 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
    Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
                             NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
                             NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
                             NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
    if (!Opcode)
      return false;
-    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
-                      getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
-                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
-                      Offset, Chain };
+    SDValue Ops[] = {Value,
+                     getI32Imm(isVolatile, dl),
+                     getI32Imm(CodeAddrSpace, dl),
+                     getI32Imm(vecType, dl),
+                     getI32Imm(toType, dl),
+                     getI32Imm(toTypeWidth, dl),
+                     Base,
+                     Offset,
+                     Chain};
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
-  } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
-                               : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+  } else if (PointerSize == 64
+                 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
+                 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
    if (PointerSize == 64)
      Opcode = pickOpcodeForVT(
          SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
@ -1787,10 +1826,15 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
    if (!Opcode)
      return false;

-    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
-                      getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
-                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
-                      Offset, Chain };
+    SDValue Ops[] = {Value,
+                     getI32Imm(isVolatile, dl),
+                     getI32Imm(CodeAddrSpace, dl),
+                     getI32Imm(vecType, dl),
+                     getI32Imm(toType, dl),
+                     getI32Imm(toTypeWidth, dl),
+                     Base,
+                     Offset,
+                     Chain};
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
  } else {
    if (PointerSize == 64)
@ -1806,10 +1850,14 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
                               NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
    if (!Opcode)
      return false;
-    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
-                      getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
-                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
-                      Chain };
+    SDValue Ops[] = {Value,
+                     getI32Imm(isVolatile, dl),
+                     getI32Imm(CodeAddrSpace, dl),
+                     getI32Imm(vecType, dl),
+                     getI32Imm(toType, dl),
+                     getI32Imm(toTypeWidth, dl),
+                     BasePtr,
+                     Chain};
    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
  }

--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@ -47,21 +47,17 @@ STATISTIC(ChecksUnable, "Bounds checks unable to add");

 using BuilderTy = IRBuilder<TargetFolder>;

-/// Adds run-time bounds checks to memory accessing instructions.
+/// Gets the conditions under which memory accessing instructions will overflow.
 ///
 /// \p Ptr is the pointer that will be read/written, and \p InstVal is either
 /// the result from the load or the value being stored. It is used to determine
 /// the size of memory block that is touched.
 ///
-/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
-///
-/// Returns true if any change was made to the IR, false otherwise.
-template <typename GetTrapBBT>
-static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
-                                const DataLayout &DL, TargetLibraryInfo &TLI,
-                                ObjectSizeOffsetEvaluator &ObjSizeEval,
-                                BuilderTy &IRB, GetTrapBBT GetTrapBB,
-                                ScalarEvolution &SE) {
+/// Returns the condition under which the access will overflow.
+static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
+                                 const DataLayout &DL, TargetLibraryInfo &TLI,
+                                 ObjectSizeOffsetEvaluator &ObjSizeEval,
+                                 BuilderTy &IRB, ScalarEvolution &SE) {
  uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
  LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
                    << " bytes\n");
@ -70,7 +66,7 @@ static bool instrumentMemAccess(Value *Ptr, Value *InstVal,

  if (!ObjSizeEval.bothKnown(SizeOffset)) {
    ++ChecksUnable;
-    return false;
+    return nullptr;
  }

  Value *Size   = SizeOffset.first;
@ -107,13 +103,23 @@ static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
    Or = IRB.CreateOr(Cmp1, Or);
  }

+  return Or;
+}
+
+/// Adds run-time bounds checks to memory accessing instructions.
+///
+/// \p Or is the condition that should guard the trap.
+///
+/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
+template <typename GetTrapBBT>
+static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) {
  // check if the comparison is always false
  ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
  if (C) {
    ++ChecksSkipped;
    // If non-zero, nothing to do.
    if (!C->getZExtValue())
-      return true;
+      return;
  }
  ++ChecksAdded;

@ -127,12 +133,11 @@ static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
    // FIXME: We should really handle this differently to bypass the splitting
    // the block.
    BranchInst::Create(GetTrapBB(IRB), OldBB);
-    return true;
+    return;
  }

  // Create the conditional branch.
  BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);
-  return true;
 }

 static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
@ -143,11 +148,25 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,

  // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
  // touching instructions
-  std::vector<Instruction *> WorkList;
+  SmallVector<std::pair<Instruction *, Value *>, 4> TrapInfo;
  for (Instruction &I : instructions(F)) {
-    if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) ||
-        isa<AtomicRMWInst>(I))
-        WorkList.push_back(&I);
+    Value *Or = nullptr;
+    BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+      Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
+                              ObjSizeEval, IRB, SE);
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+      Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
+                              DL, TLI, ObjSizeEval, IRB, SE);
+    } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+      Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
+                              DL, TLI, ObjSizeEval, IRB, SE);
+    } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
+      Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), DL,
+                              TLI, ObjSizeEval, IRB, SE);
+    }
+    if (Or)
+      TrapInfo.push_back(std::make_pair(&I, Or));
  }

  // Create a trapping basic block on demand using a callback. Depending on
@ -176,29 +195,14 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
    return TrapBB;
  };

-  bool MadeChange = false;
-  for (Instruction *Inst : WorkList) {
+  // Add the checks.
+  for (const auto &Entry : TrapInfo) {
+    Instruction *Inst = Entry.first;
    BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL));
-    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-      MadeChange |= instrumentMemAccess(LI->getPointerOperand(), LI, DL, TLI,
-                                        ObjSizeEval, IRB, GetTrapBB, SE);
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      MadeChange |=
-          instrumentMemAccess(SI->getPointerOperand(), SI->getValueOperand(),
-                              DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE);
-    } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
-      MadeChange |=
-          instrumentMemAccess(AI->getPointerOperand(), AI->getCompareOperand(),
-                              DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE);
-    } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
-      MadeChange |=
-          instrumentMemAccess(AI->getPointerOperand(), AI->getValOperand(), DL,
-                              TLI, ObjSizeEval, IRB, GetTrapBB, SE);
-    } else {
-      llvm_unreachable("unknown Instruction type");
-    }
+    insertBoundsCheck(Entry.second, IRB, GetTrapBB);
  }
-  return MadeChange;
+
+  return !TrapInfo.empty();
 }

 PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) {
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
@ -1213,9 +1213,13 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
  auto fields = builder.beginStruct();

  bool IsOpenCL = CGM.getLangOpts().OpenCL;
+  bool IsWindows = CGM.getTarget().getTriple().isOSWindows();
  if (!IsOpenCL) {
    // isa
-    fields.add(CGM.getNSConcreteGlobalBlock());
+    if (IsWindows)
+      fields.addNullPointer(CGM.Int8PtrPtrTy);
+    else
+      fields.add(CGM.getNSConcreteGlobalBlock());

    // __flags
    BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
@ -1250,7 +1254,27 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,

  llvm::Constant *literal = fields.finishAndCreateGlobal(
      "__block_literal_global", blockInfo.BlockAlign,
-      /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace);
+      /*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace);
+
+  // Windows does not allow globals to be initialised to point to globals in
+  // different DLLs.  Any such variables must run code to initialise them.
+  if (IsWindows) {
+    auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy,
+          {}), llvm::GlobalValue::InternalLinkage, ".block_isa_init",
+        &CGM.getModule());
+    llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry",
+          Init));
+    b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(),
+        b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity());
+    b.CreateRetVoid();
+    // We can't use the normal LLVM global initialisation array, because we
+    // need to specify that this runs early in library initialisation.
+    auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
+        /*isConstant*/true, llvm::GlobalValue::InternalLinkage,
+        Init, ".block_isa_init_ptr");
+    InitVar->setSection(".CRT$XCLa");
+    CGM.addUsedGlobal(InitVar);
+  }

  // Return a constant of the appropriately-casted type.
  llvm::Type *RequiredType =
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
@ -3812,40 +3812,10 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
  // is.  This allows code compiled with non-fragile ivars to work correctly
  // when linked against code which isn't (most of the time).
  llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name);
-  if (!IvarOffsetPointer) {
-    // This will cause a run-time crash if we accidentally use it.  A value of
-    // 0 would seem more sensible, but will silently overwrite the isa pointer
-    // causing a great deal of confusion.
-    uint64_t Offset = -1;
-    // We can't call ComputeIvarBaseOffset() here if we have the
-    // implementation, because it will create an invalid ASTRecordLayout object
-    // that we are then stuck with forever, so we only initialize the ivar
-    // offset variable with a guess if we only have the interface.  The
-    // initializer will be reset later anyway, when we are generating the class
-    // description.
-    if (!CGM.getContext().getObjCImplementation(
-              const_cast<ObjCInterfaceDecl *>(ID)))
-      Offset = ComputeIvarBaseOffset(CGM, ID, Ivar);
-
-    llvm::ConstantInt *OffsetGuess = llvm::ConstantInt::get(Int32Ty, Offset,
-                             /*isSigned*/true);
-    // Don't emit the guess in non-PIC code because the linker will not be able
-    // to replace it with the real version for a library.  In non-PIC code you
-    // must compile with the fragile ABI if you want to use ivars from a
-    // GCC-compiled class.
-    if (CGM.getLangOpts().PICLevel) {
-      llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule,
-            Int32Ty, false,
-            llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess");
-      IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
-            IvarOffsetGV->getType(), false, llvm::GlobalValue::LinkOnceAnyLinkage,
-            IvarOffsetGV, Name);
-    } else {
-      IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
-              llvm::Type::getInt32PtrTy(VMContext), false,
-              llvm::GlobalValue::ExternalLinkage, nullptr, Name);
-    }
-  }
+  if (!IvarOffsetPointer)
+    IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
+            llvm::Type::getInt32PtrTy(VMContext), false,
+            llvm::GlobalValue::ExternalLinkage, nullptr, Name);
  return IvarOffsetPointer;
 }

--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
@ -127,6 +127,7 @@ namespace clang {
                                CodeGenOpts, C, CoverageInfo)),
          LinkModules(std::move(LinkModules)) {
      FrontendTimesIsEnabled = TimePasses;
+      llvm::TimePassesIsEnabled = TimePasses;
    }
    llvm::Module *getModule() const { return Gen->GetModule(); }
    std::unique_ptr<llvm::Module> takeModule() {
--- a/contrib/llvm/tools/clang/lib/Headers/unwind.h
+++ b/contrib/llvm/tools/clang/lib/Headers/unwind.h
@ -154,8 +154,12 @@ struct _Unwind_Control_Block {
 struct _Unwind_Exception {
  _Unwind_Exception_Class exception_class;
  _Unwind_Exception_Cleanup_Fn exception_cleanup;
+#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)
+  _Unwind_Word private_[6];
+#else
  _Unwind_Word private_1;
  _Unwind_Word private_2;
+#endif
  /* The Itanium ABI requires that _Unwind_Exception objects are "double-word
   * aligned".  GCC has interpreted this to mean "use the maximum useful
   * alignment for the target"; so do we. */
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
@ -113,9 +113,15 @@ ParsedType Sema::getConstructorName(IdentifierInfo &II,
      break;
    }
  }
-  if (!InjectedClassName && CurClass->isInvalidDecl())
+  if (!InjectedClassName) {
+    if (!CurClass->isInvalidDecl()) {
+      // FIXME: RequireCompleteDeclContext doesn't check dependent contexts
+      // properly. Work around it here for now.
+      Diag(SS.getLastQualifierNameLoc(),
+           diag::err_incomplete_nested_name_spec) << CurClass << SS.getRange();
+    }
    return ParsedType();
-  assert(InjectedClassName && "couldn't find injected class name");
+  }

  QualType T = Context.getTypeDeclType(InjectedClassName);
  DiagnoseUseOfDecl(InjectedClassName, NameLoc);
--- a/contrib/llvm/tools/lld/COFF/InputFiles.cpp
+++ b/contrib/llvm/tools/lld/COFF/InputFiles.cpp
@ -205,7 +205,13 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,

 void ObjFile::readAssociativeDefinition(
    COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
-  SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())];
+  readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj()));
+}
+
+void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym,
+                                        const coff_aux_section_definition *Def,
+                                        uint32_t ParentSection) {
+  SectionChunk *Parent = SparseChunks[ParentSection];

  // If the parent is pending, it probably means that its section definition
  // appears after us in the symbol table. Leave the associated section as
@ -225,6 +231,35 @@ void ObjFile::readAssociativeDefinition(
  }
 }

+void ObjFile::recordPrevailingSymbolForMingw(
+    COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
+  // For comdat symbols in executable sections, where this is the copy
+  // of the section chunk we actually include instead of discarding it,
+  // add the symbol to a map to allow using it for implicitly
+  // associating .[px]data$<func> sections to it.
+  int32_t SectionNumber = Sym.getSectionNumber();
+  SectionChunk *SC = SparseChunks[SectionNumber];
+  if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
+    StringRef Name;
+    COFFObj->getSymbolName(Sym, Name);
+    PrevailingSectionMap[Name] = SectionNumber;
+  }
+}
+
+void ObjFile::maybeAssociateSEHForMingw(
+    COFFSymbolRef Sym, const coff_aux_section_definition *Def,
+    const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
+  StringRef Name;
+  COFFObj->getSymbolName(Sym, Name);
+  if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) {
+    // For MinGW, treat .[px]data$<func> as implicitly associative to
+    // the symbol <func>.
+    auto ParentSym = PrevailingSectionMap.find(Name);
+    if (ParentSym != PrevailingSectionMap.end())
+      readAssociativeDefinition(Sym, Def, ParentSym->second);
+  }
+}
+
 Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
  SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
  if (Sym.isExternal()) {
@ -248,19 +283,24 @@ void ObjFile::initializeSymbols() {
  std::vector<uint32_t> PendingIndexes;
  PendingIndexes.reserve(NumSymbols);

+  DenseMap<StringRef, uint32_t> PrevailingSectionMap;
  std::vector<const coff_aux_section_definition *> ComdatDefs(
      COFFObj->getNumberOfSections() + 1);

  for (uint32_t I = 0; I < NumSymbols; ++I) {
    COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
+    bool PrevailingComdat;
    if (COFFSym.isUndefined()) {
      Symbols[I] = createUndefined(COFFSym);
    } else if (COFFSym.isWeakExternal()) {
      Symbols[I] = createUndefined(COFFSym);
      uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
      WeakAliases.emplace_back(Symbols[I], TagIndex);
-    } else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) {
+    } else if (Optional<Symbol *> OptSym =
+                   createDefined(COFFSym, ComdatDefs, PrevailingComdat)) {
      Symbols[I] = *OptSym;
+      if (Config->MinGW && PrevailingComdat)
+        recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap);
    } else {
      // createDefined() returns None if a symbol belongs to a section that
      // was pending at the point when the symbol was read. This can happen in
@ -278,9 +318,12 @@ void ObjFile::initializeSymbols() {

  for (uint32_t I : PendingIndexes) {
    COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
-    if (auto *Def = Sym.getSectionDefinition())
+    if (auto *Def = Sym.getSectionDefinition()) {
      if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
        readAssociativeDefinition(Sym, Def);
+      else if (Config->MinGW)
+        maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap);
+    }
    if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) {
      StringRef Name;
      COFFObj->getSymbolName(Sym, Name);
@ -306,7 +349,9 @@ Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {

 Optional<Symbol *> ObjFile::createDefined(
    COFFSymbolRef Sym,
-    std::vector<const coff_aux_section_definition *> &ComdatDefs) {
+    std::vector<const coff_aux_section_definition *> &ComdatDefs,
+    bool &Prevailing) {
+  Prevailing = false;
  auto GetName = [&]() {
    StringRef S;
    COFFObj->getSymbolName(Sym, S);
@ -352,7 +397,6 @@ Optional<Symbol *> ObjFile::createDefined(
  if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
    ComdatDefs[SectionNumber] = nullptr;
    Symbol *Leader;
-    bool Prevailing;
    if (Sym.isExternal()) {
      std::tie(Leader, Prevailing) =
          Symtab->addComdat(this, GetName(), Sym.getGeneric());
--- a/contrib/llvm/tools/lld/COFF/InputFiles.h
+++ b/contrib/llvm/tools/lld/COFF/InputFiles.h
@ -13,6 +13,7 @@
 #include "Config.h"
 #include "lld/Common/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/LTO/LTO.h"
 #include "llvm/Object/Archive.h"
@ -157,10 +158,24 @@ class ObjFile : public InputFile {
      COFFSymbolRef COFFSym,
      const llvm::object::coff_aux_section_definition *Def);

+  void readAssociativeDefinition(
+      COFFSymbolRef COFFSym,
+      const llvm::object::coff_aux_section_definition *Def,
+      uint32_t ParentSection);
+
+  void recordPrevailingSymbolForMingw(
+      COFFSymbolRef COFFSym,
+      llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap);
+
+  void maybeAssociateSEHForMingw(
+      COFFSymbolRef Sym, const llvm::object::coff_aux_section_definition *Def,
+      const llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap);
+
  llvm::Optional<Symbol *>
  createDefined(COFFSymbolRef Sym,
                std::vector<const llvm::object::coff_aux_section_definition *>
-                    &ComdatDefs);
+                    &ComdatDefs,
+                bool &PrevailingComdat);
  Symbol *createRegular(COFFSymbolRef Sym);
  Symbol *createUndefined(COFFSymbolRef Sym);

--- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp
+++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp
@ -116,7 +116,8 @@ void LinkerScript::expandMemoryRegions(uint64_t Size) {
  if (Ctx->MemRegion)
    expandMemoryRegion(Ctx->MemRegion, Size, Ctx->MemRegion->Name,
                       Ctx->OutSec->Name);
-  if (Ctx->LMARegion)
+  // Only expand the LMARegion if it is different from MemRegion.
+  if (Ctx->LMARegion && Ctx->MemRegion != Ctx->LMARegion)
    expandMemoryRegion(Ctx->LMARegion, Size, Ctx->LMARegion->Name,
                       Ctx->OutSec->Name);
 }
@ -750,6 +751,13 @@ MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *Sec) {
  return nullptr;
 }

+static OutputSection *findFirstSection(PhdrEntry *Load) {
+  for (OutputSection *Sec : OutputSections)
+    if (Sec->PtLoad == Load)
+      return Sec;
+  return nullptr;
+}
+
 // This function assigns offsets to input sections and an output section
 // for a single sections command (e.g. ".text { *(.text); }").
 void LinkerScript::assignOffsets(OutputSection *Sec) {
@ -775,8 +783,11 @@ void LinkerScript::assignOffsets(OutputSection *Sec) {
  // will set the LMA such that the difference between VMA and LMA for the
  // section is the same as the preceding output section in the same region
  // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
+  // This, however, should only be done by the first "non-header" section
+  // in the segment.
  if (PhdrEntry *L = Ctx->OutSec->PtLoad)
-    L->LMAOffset = Ctx->LMAOffset;
+    if (Sec == findFirstSection(L))
+      L->LMAOffset = Ctx->LMAOffset;

  // We can call this method multiple times during the creation of
  // thunks and want to start over calculation each time.
@ -953,13 +964,6 @@ void LinkerScript::adjustSectionsAfterSorting() {
  }
 }

-static OutputSection *findFirstSection(PhdrEntry *Load) {
-  for (OutputSection *Sec : OutputSections)
-    if (Sec->PtLoad == Load)
-      return Sec;
-  return nullptr;
-}
-
 static uint64_t computeBase(uint64_t Min, bool AllocateHeaders) {
  // If there is no SECTIONS or if the linkerscript is explicit about program
  // headers, do our best to allocate them.
--- a/contrib/llvm/tools/lld/ELF/Writer.cpp
+++ b/contrib/llvm/tools/lld/ELF/Writer.cpp
@ -1815,12 +1815,14 @@ template <class ELFT> std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs() {
    // Segments are contiguous memory regions that has the same attributes
    // (e.g. executable or writable). There is one phdr for each segment.
    // Therefore, we need to create a new phdr when the next section has
-    // different flags or is loaded at a discontiguous address using AT linker
-    // script command. At the same time, we don't want to create a separate
-    // load segment for the headers, even if the first output section has
-    // an AT attribute.
+    // different flags or is loaded at a discontiguous address or memory
+    // region using AT or AT> linker script command, respectively. At the same
+    // time, we don't want to create a separate load segment for the headers,
+    // even if the first output section has an AT or AT> attribute.
    uint64_t NewFlags = computeFlags(Sec->getPhdrFlags());
-    if ((Sec->LMAExpr && Load->LastSec != Out::ProgramHeaders) ||
+    if (((Sec->LMAExpr ||
+          (Sec->LMARegion && (Sec->LMARegion != Load->FirstSec->LMARegion))) &&
+         Load->LastSec != Out::ProgramHeaders) ||
        Sec->MemRegion != Load->FirstSec->MemRegion || Flags != NewFlags) {

      Load = AddHdr(PT_LOAD, NewFlags);
--- a/contrib/llvm/tools/lldb/include/lldb/Utility/VMRange.h
+++ b/contrib/llvm/tools/lldb/include/lldb/Utility/VMRange.h
@ -87,24 +87,6 @@ class VMRange {
  void Dump(Stream *s, lldb::addr_t base_addr = 0,
            uint32_t addr_width = 8) const;

-  class ValueInRangeUnaryPredicate {
-  public:
-    ValueInRangeUnaryPredicate(lldb::addr_t value) : _value(value) {}
-    bool operator()(const VMRange &range) const {
-      return range.Contains(_value);
-    }
-    lldb::addr_t _value;
-  };
-
-  class RangeInRangeUnaryPredicate {
-  public:
-    RangeInRangeUnaryPredicate(VMRange range) : _range(range) {}
-    bool operator()(const VMRange &range) const {
-      return range.Contains(_range);
-    }
-    const VMRange &_range;
-  };
-
  static bool ContainsValue(const VMRange::collection &coll,
                            lldb::addr_t value);

--- a/contrib/llvm/tools/lldb/source/Utility/VMRange.cpp
+++ b/contrib/llvm/tools/lldb/source/Utility/VMRange.cpp
@ -24,14 +24,16 @@ using namespace lldb_private;

 bool VMRange::ContainsValue(const VMRange::collection &coll,
                            lldb::addr_t value) {
-  ValueInRangeUnaryPredicate in_range_predicate(value);
-  return llvm::find_if(coll, in_range_predicate) != coll.end();
+  return llvm::find_if(coll, [&](const VMRange &r) {
+           return r.Contains(value);
+         }) != coll.end();
 }

 bool VMRange::ContainsRange(const VMRange::collection &coll,
                            const VMRange &range) {
-  RangeInRangeUnaryPredicate in_range_predicate(range);
-  return llvm::find_if(coll, in_range_predicate) != coll.end();
+  return llvm::find_if(coll, [&](const VMRange &r) {
+           return r.Contains(range);
+         }) != coll.end();
 }

 void VMRange::Dump(Stream *s, lldb::addr_t offset, uint32_t addr_width) const {
--- a/lib/clang/freebsd_cc_version.h
+++ b/lib/clang/freebsd_cc_version.h
@ -1,3 +1,3 @@
 /* $FreeBSD$ */

-#define	FREEBSD_CC_VERSION		1200015
+#define	FREEBSD_CC_VERSION		1200016
--- a/lib/clang/include/clang/Basic/Version.inc
+++ b/lib/clang/include/clang/Basic/Version.inc
@ -8,4 +8,4 @@

 #define	CLANG_VENDOR			"FreeBSD "

-#define	SVN_REVISION			"338892"
+#define	SVN_REVISION			"339355"
--- a/lib/clang/include/lld/Common/Version.inc
+++ b/lib/clang/include/lld/Common/Version.inc
@ -7,4 +7,4 @@

 #define LLD_REPOSITORY_STRING "FreeBSD"
 // <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
-#define LLD_REVISION_STRING "338892-1200005"
+#define LLD_REVISION_STRING "339355-1200005"