Merge llvm 3.6.0rc2 from ^/vendor/llvm/dist, merge clang 3.6.0rc2 from

^/vendor/clang/dist, resolve conflicts, and cleanup patches.
2015-01-31 21:57:38 +00:00 · 2015-01-31 21:57:38 +00:00 · 3de688eb16
commit 3de688eb16
parent acdf53f9a3 9e435806aa ec304151b7
101 changed files with 1231 additions and 1076 deletions
--- a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
@ -195,6 +195,9 @@ public:
  /// then return NULL.
  Pass *findAnalysisPass(AnalysisID AID);

+  /// Retrieve the PassInfo for an analysis.
+  const PassInfo *findAnalysisPassInfo(AnalysisID AID) const;
+
  /// Find analysis usage information for the pass P.
  AnalysisUsage *findAnalysisUsage(Pass *P);

@ -251,6 +254,12 @@ private:
  SmallVector<ImmutablePass *, 16> ImmutablePasses;

  DenseMap<Pass *, AnalysisUsage *> AnUsageMap;
+
+  /// Collection of PassInfo objects found via analysis IDs and in this top
+  /// level manager. This is used to memoize queries to the pass registry.
+  /// FIXME: This is an egregious hack because querying the pass registry is
+  /// either slow or racy.
+  mutable DenseMap<AnalysisID, const PassInfo *> AnalysisPassInfos;
 };


--- a/contrib/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmBackend.h
@ -61,6 +61,12 @@ public:
  /// markers. If not, data region directives will be ignored.
  bool hasDataInCodeSupport() const { return HasDataInCodeSupport; }

+  /// doesSectionRequireSymbols - Check whether the given section requires that
+  /// all symbols (even temporaries) have symbol table entries.
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    return false;
+  }
+
  /// @name Target Fixup Interfaces
  /// @{

--- a/contrib/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm/include/llvm/MC/MCAssembler.h
@ -11,7 +11,6 @@
 #define LLVM_MC_MCASSEMBLER_H

 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
@ -882,8 +881,6 @@ private:

  iplist<MCSymbolData> Symbols;

-  DenseSet<const MCSymbol *> LocalsUsedInReloc;
-
  /// The map of sections to their associated assembler backend data.
  //
  // FIXME: Avoid this indirection?
@ -983,9 +980,6 @@ private:
                                        MCFragment &F, const MCFixup &Fixup);

 public:
-  void addLocalUsedInReloc(const MCSymbol &Sym);
-  bool isLocalUsedInReloc(const MCSymbol &Sym) const;
-
  /// Compute the effective fragment size assuming it is laid out at the given
  /// \p SectionAddress and \p FragmentOffset.
  uint64_t computeFragmentSize(const MCAsmLayout &Layout,
--- a/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCMachObjectWriter.h
@ -68,10 +68,12 @@ public:
  /// @name API
  /// @{

-  virtual void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+  virtual void RecordRelocation(MachObjectWriter *Writer,
+                                const MCAssembler &Asm,
                                const MCAsmLayout &Layout,
                                const MCFragment *Fragment,
-                                const MCFixup &Fixup, MCValue Target,
+                                const MCFixup &Fixup,
+                                MCValue Target,
                                uint64_t &FixedValue) = 0;

  /// @}
@ -95,14 +97,8 @@ class MachObjectWriter : public MCObjectWriter {
  /// @name Relocation Data
  /// @{

-  struct RelAndSymbol {
-    const MCSymbolData *Sym;
-    MachO::any_relocation_info MRE;
-    RelAndSymbol(const MCSymbolData *Sym, const MachO::any_relocation_info &MRE)
-        : Sym(Sym), MRE(MRE) {}
-  };
-
-  llvm::DenseMap<const MCSectionData *, std::vector<RelAndSymbol>> Relocations;
+  llvm::DenseMap<const MCSectionData*,
+                 std::vector<MachO::any_relocation_info> > Relocations;
  llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;

  /// @}
@ -217,15 +213,9 @@ public:
  //  - Input errors, where something cannot be correctly encoded. 'as' allows
  //    these through in many cases.

-  // Add a relocation to be output in the object file. At the time this is
-  // called, the symbol indexes are not know, so if the relocation refers
-  // to a symbol it should be passed as \p RelSymbol so that it can be updated
-  // afterwards. If the relocation doesn't refer to a symbol, nullptr should be
-  // used.
-  void addRelocation(const MCSymbolData *RelSymbol, const MCSectionData *SD,
+  void addRelocation(const MCSectionData *SD,
                     MachO::any_relocation_info &MRE) {
-    RelAndSymbol P(RelSymbol, MRE);
-    Relocations[SD].push_back(P);
+    Relocations[SD].push_back(MRE);
  }

  void RecordScatteredRelocation(const MCAssembler &Asm,
@ -241,7 +231,7 @@ public:
                            const MCFixup &Fixup, MCValue Target,
                            uint64_t &FixedValue);

-  void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                        const MCFragment *Fragment, const MCFixup &Fixup,
                        MCValue Target, bool &IsPCRel,
                        uint64_t &FixedValue) override;
--- a/contrib/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectWriter.h
@ -76,10 +76,12 @@ public:
  /// post layout binding. The implementation is responsible for storing
  /// information about the relocation so that it can be emitted during
  /// WriteObject().
-  virtual void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCAsmLayout &Layout,
                                const MCFragment *Fragment,
                                const MCFixup &Fixup, MCValue Target,
-                                bool &IsPCRel, uint64_t &FixedValue) = 0;
+                                bool &IsPCRel,
+                                uint64_t &FixedValue) = 0;

  /// \brief Check whether the difference (A - B) between two symbol
  /// references is fully resolved.
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@ -17,6 +17,7 @@

 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/Target/TargetLibraryInfo.h"

 namespace llvm {
 class Value;
@ -53,8 +54,10 @@ private:
  Value *optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B);
  Value *optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B);
  Value *optimizeMemSetChk(CallInst *CI, IRBuilder<> &B);
-  Value *optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B);
-  Value *optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B);
+
+  // Str/Stp cpy are similar enough to be handled in the same functions.
+  Value *optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc::Func Func);
+  Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc::Func Func);

  /// \brief Checks whether the call \p CI to a fortified libcall is foldable
  /// to the non-fortified version.
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@ -3154,8 +3154,9 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
  if (LHS == RHS)
    return getConstant(LHS->getType(), 0);

-  // X - Y --> X + -Y
-  return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
+  // X - Y --> X + -Y.
+  // X -(nsw || nuw) Y --> X + -Y.
+  return getAddExpr(LHS, getNegativeSCEV(RHS));
 }

 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@ -3461,12 +3462,10 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                  if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
                    Flags = setFlags(Flags, SCEV::FlagNUW);
                }
-              } else if (const SubOperator *OBO =
-                           dyn_cast<SubOperator>(BEValueV)) {
-                if (OBO->hasNoUnsignedWrap())
-                  Flags = setFlags(Flags, SCEV::FlagNUW);
-                if (OBO->hasNoSignedWrap())
-                  Flags = setFlags(Flags, SCEV::FlagNSW);
+
+                // We cannot transfer nuw and nsw flags from subtraction
+                // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+                // for instance.
              }

              const SCEV *StartVal = getSCEV(StartValueV);
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@ -626,10 +626,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {

  DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());

-  // If this type is not derived from any type or the type is a declaration then
-  // take conservative approach.
-  if (!BaseType.isValid() || BaseType.isForwardDecl())
-    return Ty.getSizeInBits();
+  assert(BaseType.isValid());

  // If this is a derived type, go ahead and get the base type, unless it's a
  // reference then it's just the size of the field. Pointer types have no need
@ -1473,7 +1470,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
    uint64_t FieldSize = getBaseTypeSize(DD, DT);
    uint64_t OffsetInBytes;

-    if (Size != FieldSize) {
+    if (FieldSize && Size != FieldSize) {
      // Handle bitfield, assume bytes are 8 bits.
      addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
      addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@ -11,11 +11,19 @@
 //
 //===----------------------------------------------------------------------===//

-#include "llvm/IR/Function.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
 using namespace llvm;

 Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
@ -43,15 +51,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
  // because CodeGen overloads that to mean preserving the MachineBasicBlock
  // CFG in addition to the LLVM IR CFG.
  AU.addPreserved<AliasAnalysis>();
-  AU.addPreserved("scalar-evolution");
-  AU.addPreserved("iv-users");
-  AU.addPreserved("memdep");
-  AU.addPreserved("live-values");
-  AU.addPreserved("domtree");
-  AU.addPreserved("domfrontier");
-  AU.addPreserved("loops");
-  AU.addPreserved("lda");
-  AU.addPreserved("stack-protector");
+  AU.addPreserved<DominanceFrontier>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+  AU.addPreserved<IVUsers>();
+  AU.addPreserved<LoopInfo>();
+  AU.addPreserved<MemoryDependenceAnalysis>();
+  AU.addPreserved<ScalarEvolution>();
+  AU.addPreserved<StackProtector>();

  FunctionPass::getAnalysisUsage(AU);
 }
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@ -563,9 +563,23 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
  return nullptr;
 }

-void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
-  MDNode *N =
-      MD ? cast<MDNode>(unwrap<MetadataAsValue>(MD)->getMetadata()) : nullptr;
+// MetadataAsValue uses a canonical format which strips the actual MDNode for
+// MDNode with just a single constant value, storing just a ConstantAsMetadata
+// This undoes this canonicalization, reconstructing the MDNode.
+static MDNode *extractMDNode(MetadataAsValue *MAV) {
+  Metadata *MD = MAV->getMetadata();
+  assert((isa<MDNode>(MD) || isa<ConstantAsMetadata>(MD)) &&
+      "Expected a metadata node or a canonicalized constant");
+
+  if (MDNode *N = dyn_cast<MDNode>(MD))
+    return N;
+
+  return MDNode::get(MAV->getContext(), MD);
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef Val) {
+  MDNode *N = Val ? extractMDNode(unwrap<MetadataAsValue>(Val)) : nullptr;
+
  unwrap<Instruction>(Inst)->setMetadata(KindID, N);
 }

@ -795,7 +809,7 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
    return;
  if (!Val)
    return;
-  N->addOperand(cast<MDNode>(unwrap<MetadataAsValue>(Val)->getMetadata()));
+  N->addOperand(extractMDNode(unwrap<MetadataAsValue>(Val)));
 }

 /*--.. Operations on scalar constants ......................................--*/
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@ -600,8 +600,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
  // If P is an analysis pass and it is available then do not
  // generate the analysis again. Stale analysis info should not be
  // available at this point.
-  const PassInfo *PI =
-    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+  const PassInfo *PI = findAnalysisPassInfo(P->getPassID());
  if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
    delete P;
    return;
@ -619,7 +618,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {

      Pass *AnalysisPass = findAnalysisPass(*I);
      if (!AnalysisPass) {
-        const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+        const PassInfo *PI = findAnalysisPassInfo(*I);

        if (!PI) {
          // Pass P is not in the global PassRegistry
@ -716,8 +715,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
      return *I;

    // If Pass not found then check the interfaces implemented by Immutable Pass
-    const PassInfo *PassInf =
-      PassRegistry::getPassRegistry()->getPassInfo(PI);
+    const PassInfo *PassInf = findAnalysisPassInfo(PI);
    assert(PassInf && "Expected all immutable passes to be initialized");
    const std::vector<const PassInfo*> &ImmPI =
      PassInf->getInterfacesImplemented();
@ -731,6 +729,17 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
  return nullptr;
 }

+const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const {
+  const PassInfo *&PI = AnalysisPassInfos[AID];
+  if (!PI)
+    PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+  else
+    assert(PI == PassRegistry::getPassRegistry()->getPassInfo(AID) &&
+           "The pass info pointer changed for an analysis ID!");
+
+  return PI;
+}
+
 // Print passes managed by this top level manager.
 void PMTopLevelManager::dumpPasses() const {

@ -759,8 +768,7 @@ void PMTopLevelManager::dumpArguments() const {
  dbgs() << "Pass Arguments: ";
  for (SmallVectorImpl<ImmutablePass *>::const_iterator I =
       ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
-    if (const PassInfo *PI =
-        PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+    if (const PassInfo *PI = findAnalysisPassInfo((*I)->getPassID())) {
      assert(PI && "Expected all immutable passes to be initialized");
      if (!PI->isAnalysisGroup())
        dbgs() << " -" << PI->getPassArgument();
@ -824,7 +832,7 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) {

  // This pass is the current implementation of all of the interfaces it
  // implements as well.
-  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+  const PassInfo *PInf = TPM->findAnalysisPassInfo(PI);
  if (!PInf) return;
  const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
  for (unsigned i = 0, e = II.size(); i != e; ++i)
@ -957,7 +965,7 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
  }

  AnalysisID PI = P->getPassID();
-  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+  if (const PassInfo *PInf = TPM->findAnalysisPassInfo(PI)) {
    // Remove the pass itself (if it is not already removed).
    AvailableAnalysis.erase(PI);

@ -1037,7 +1045,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
  for (SmallVectorImpl<AnalysisID>::iterator
         I = ReqAnalysisNotAvailable.begin(),
         E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
-    const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+    const PassInfo *PI = TPM->findAnalysisPassInfo(*I);
    Pass *AnalysisPass = PI->createPass();
    this->addLowerLevelRequiredPass(P, AnalysisPass);
  }
@ -1142,7 +1150,7 @@ void PMDataManager::dumpPassArguments() const {
      PMD->dumpPassArguments();
    else
      if (const PassInfo *PI =
-            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+            TPM->findAnalysisPassInfo((*I)->getPassID()))
        if (!PI->isAnalysisGroup())
          dbgs() << " -" << PI->getPassArgument();
  }
@ -1218,7 +1226,7 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
  dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
  for (unsigned i = 0; i != Set.size(); ++i) {
    if (i) dbgs() << ',';
-    const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+    const PassInfo *PInf = TPM->findAnalysisPassInfo(Set[i]);
    if (!PInf) {
      // Some preserved passes, such as AliasAnalysis, may not be initialized by
      // all drivers.
@ -1658,8 +1666,8 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {

    OnTheFlyManagers[P] = FPP;
  }
-  const PassInfo * RequiredPassPI =
-    PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID());
+  const PassInfo *RequiredPassPI =
+      TPM->findAnalysisPassInfo(RequiredPass->getPassID());

  Pass *FoundPass = nullptr;
  if (RequiredPassPI && RequiredPassPI->isAnalysis()) {
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@ -219,7 +219,7 @@ class ELFObjectWriter : public MCObjectWriter {
                                  const MCSymbolData *SD, uint64_t C,
                                  unsigned Type) const;

-    void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+    void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                          const MCFragment *Fragment, const MCFixup &Fixup,
                          MCValue Target, bool &IsPCRel,
                          uint64_t &FixedValue) override;
@ -789,11 +789,13 @@ static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) {
  return nullptr;
 }

-void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
                                       const MCAsmLayout &Layout,
                                       const MCFragment *Fragment,
-                                       const MCFixup &Fixup, MCValue Target,
-                                       bool &IsPCRel, uint64_t &FixedValue) {
+                                       const MCFixup &Fixup,
+                                       MCValue Target,
+                                       bool &IsPCRel,
+                                       uint64_t &FixedValue) {
  const MCSectionData *FixupSection = Fragment->getParent();
  uint64_t C = Target.getConstant();
  uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
--- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@ -27,7 +27,22 @@ bool MCAsmInfoDarwin::isSectionAtomizableBySymbols(
  // contain.
  // Sections holding 2 byte strings require symbols in order to be atomized.
  // There is no dedicated section for 4 byte strings.
-  if (SMO.getType() == MachO::S_CSTRING_LITERALS)
+  if (SMO.getKind().isMergeable1ByteCString())
+    return false;
+
+  if (SMO.getSegmentName() == "__TEXT" &&
+      SMO.getSectionName() == "__objc_classname" &&
+      SMO.getType() == MachO::S_CSTRING_LITERALS)
+    return false;
+
+  if (SMO.getSegmentName() == "__TEXT" &&
+      SMO.getSectionName() == "__objc_methname" &&
+      SMO.getType() == MachO::S_CSTRING_LITERALS)
+    return false;
+
+  if (SMO.getSegmentName() == "__TEXT" &&
+      SMO.getSectionName() == "__objc_methtype" &&
+      SMO.getType() == MachO::S_CSTRING_LITERALS)
    return false;

  if (SMO.getSegmentName() == "__DATA" && SMO.getSectionName() == "__cfstring")
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@ -425,16 +425,6 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
  return true;
 }

-void MCAssembler::addLocalUsedInReloc(const MCSymbol &Sym) {
-  assert(Sym.isTemporary());
-  LocalsUsedInReloc.insert(&Sym);
-}
-
-bool MCAssembler::isLocalUsedInReloc(const MCSymbol &Sym) const {
-  assert(Sym.isTemporary());
-  return LocalsUsedInReloc.count(&Sym);
-}
-
 bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
  // Non-temporary labels should always be visible to the linker.
  if (!Symbol.isTemporary())
@ -444,10 +434,8 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
  if (!Symbol.isInSection())
    return false;

-  if (isLocalUsedInReloc(Symbol))
-    return true;
-
-  return false;
+  // Otherwise, check if the section requires symbols even for temporary labels.
+  return getBackend().doesSectionRequireSymbols(Symbol.getSection());
 }

 const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
--- a/contrib/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@ -448,11 +448,14 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand(
  assert(OS.tell() - Start == Size);
 }

-void MachObjectWriter::RecordRelocation(MCAssembler &Asm,
+
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
                                        const MCAsmLayout &Layout,
                                        const MCFragment *Fragment,
-                                        const MCFixup &Fixup, MCValue Target,
-                                        bool &IsPCRel, uint64_t &FixedValue) {
+                                        const MCFixup &Fixup,
+                                        MCValue Target,
+                                        bool &IsPCRel,
+                                        uint64_t &FixedValue) {
  TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
                                       Target, FixedValue);
 }
@ -613,22 +616,6 @@ void MachObjectWriter::ComputeSymbolTable(
    ExternalSymbolData[i].SymbolData->setIndex(Index++);
  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
-
-  for (const MCSectionData &SD : Asm) {
-    std::vector<RelAndSymbol> &Relocs = Relocations[&SD];
-    for (RelAndSymbol &Rel : Relocs) {
-      if (!Rel.Sym)
-        continue;
-
-      // Set the Index and the IsExtern bit.
-      unsigned Index = Rel.Sym->getIndex();
-      assert(isInt<24>(Index));
-      if (IsLittleEndian)
-        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (-1 << 24)) | Index | (1 << 27);
-      else
-        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
-    }
-  }
 }

 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
@ -675,6 +662,10 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
  // Mark symbol difference expressions in variables (from .set or = directives)
  // as absolute.
  markAbsoluteVariableSymbols(Asm, Layout);
+
+  // Compute symbol table information and bind symbol indices.
+  ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
+                     UndefinedSymbolData);
 }

 bool MachObjectWriter::
@ -758,10 +749,6 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,

 void MachObjectWriter::WriteObject(MCAssembler &Asm,
                                   const MCAsmLayout &Layout) {
-  // Compute symbol table information and bind symbol indices.
-  ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
-                     UndefinedSymbolData);
-
  unsigned NumSections = Asm.size();
  const MCAssembler::VersionMinInfoType &VersionInfo =
    Layout.getAssembler().getVersionMinInfo();
@ -852,7 +839,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
  uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
  for (MCAssembler::const_iterator it = Asm.begin(),
         ie = Asm.end(); it != ie; ++it) {
-    std::vector<RelAndSymbol> &Relocs = Relocations[it];
+    std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
    unsigned NumRelocs = Relocs.size();
    uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
    WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
@ -946,10 +933,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
         ie = Asm.end(); it != ie; ++it) {
    // Write the section relocation entries, in reverse order to match 'as'
    // (approximately, the exact algorithm is more complicated than this).
-    std::vector<RelAndSymbol> &Relocs = Relocations[it];
+    std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
    for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-      Write32(Relocs[e - i - 1].MRE.r_word0);
-      Write32(Relocs[e - i - 1].MRE.r_word1);
+      Write32(Relocs[e - i - 1].r_word0);
+      Write32(Relocs[e - i - 1].r_word1);
    }
  }

--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@ -175,7 +175,7 @@ public:
                                              const MCFragment &FB, bool InSet,
                                              bool IsPCRel) const override;

-  void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                        const MCFragment *Fragment, const MCFixup &Fixup,
                        MCValue Target, bool &IsPCRel,
                        uint64_t &FixedValue) override;
@ -661,9 +661,13 @@ bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
                                                                InSet, IsPCRel);
 }

-void WinCOFFObjectWriter::RecordRelocation(
-    MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment,
-    const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
+void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCFragment *Fragment,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
+                                           bool &IsPCRel,
+                                           uint64_t &FixedValue) {
  assert(Target.getSymA() && "Relocation must reference a symbol!");

  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@ -246,13 +246,21 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {

  if (ArchName.startswith("armv")) {
    offset = 3;
-    arch = Triple::arm;
+    if (ArchName.endswith("eb")) {
+      arch = Triple::armeb;
+      ArchName = ArchName.substr(0, ArchName.size() - 2);
+    } else
+      arch = Triple::arm;
  } else if (ArchName.startswith("armebv")) {
    offset = 5;
    arch = Triple::armeb;
  } else if (ArchName.startswith("thumbv")) {
    offset = 5;
-    arch = Triple::thumb;
+    if (ArchName.endswith("eb")) {
+      arch = Triple::thumbeb;
+      ArchName = ArchName.substr(0, ArchName.size() - 2);
+    } else
+      arch = Triple::thumb;
  } else if (ArchName.startswith("thumbebv")) {
    offset = 7;
    arch = Triple::thumbeb;
@ -271,6 +279,8 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
 }

 static Triple::ArchType parseArch(StringRef ArchName) {
+  Triple::ArchType ARMArch(parseARMArch(ArchName));
+
  return StringSwitch<Triple::ArchType>(ArchName)
    .Cases("i386", "i486", "i586", "i686", Triple::x86)
    // FIXME: Do we need to support these?
@ -280,9 +290,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {
    .Cases("powerpc64", "ppu", Triple::ppc64)
    .Case("powerpc64le", Triple::ppc64le)
    .Case("xscale", Triple::arm)
-    .StartsWith("arm", parseARMArch(ArchName))
-    .StartsWith("thumb", parseARMArch(ArchName))
-    .StartsWith("aarch64", parseARMArch(ArchName))
+    .Case("xscaleeb", Triple::armeb)
+    .StartsWith("arm", ARMArch)
+    .StartsWith("thumb", ARMArch)
+    .StartsWith("aarch64", ARMArch)
    .Case("msp430", Triple::msp430)
    .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
    .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@ -379,6 +390,9 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
 }

 static Triple::SubArchType parseSubArch(StringRef SubArchName) {
+  if (SubArchName.endswith("eb"))
+    SubArchName = SubArchName.substr(0, SubArchName.size() - 2);
+
  return StringSwitch<Triple::SubArchType>(SubArchName)
    .EndsWith("v8", Triple::ARMSubArch_v8)
    .EndsWith("v8a", Triple::ARMSubArch_v8)
@ -1022,6 +1036,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
    offset = 5;
  if (offset != StringRef::npos && MArch.substr(offset, 2) == "eb")
    offset += 2;
+  if (MArch.endswith("eb"))
+    MArch = MArch.substr(0, MArch.size() - 2);
  if (offset != StringRef::npos)
    result = llvm::StringSwitch<const char *>(MArch.substr(offset))
      .Cases("v2", "v2a", "arm2")
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@ -204,6 +204,44 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
 ]>;

+//===----------------------------------------------------------------------===//
+// ARM64 Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+// This calling convention is specific to the Glasgow Haskell Compiler.
+// The only documentation is the GHC source code, specifically the C header
+// file:
+//
+//     https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h
+//
+// which defines the registers for the Spineless Tagless G-Machine (STG) that
+// GHC uses to implement lazy evaluation. The generic STG machine has a set of
+// registers which are mapped to appropriate set of architecture specific
+// registers for each CPU architecture.
+//
+// The STG Machine is documented here:
+//
+//    https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode
+//
+// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
+// register mapping".
+
+def CC_AArch64_GHC : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+  CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
+  CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
+
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+  CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
+]>;
+
 // FIXME: LR is only callee-saved in the sense that *we* preserve it and are
 // presumably a callee to someone. External functions may not do so, but this
 // is currently safe since BL has LR as an implicit-def and what happens after a
@ -249,3 +287,4 @@ def CSR_AArch64_AllRegs
                           (sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
                           (sequence "Q%u", 0, 31))>;

+def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@ -302,6 +302,8 @@ static unsigned getImplicitScaleFactor(MVT VT) {
 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
  if (CC == CallingConv::WebKit_JS)
    return CC_AArch64_WebKit_JS;
+  if (CC == CallingConv::GHC)
+    return CC_AArch64_GHC;
  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 }

--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@ -215,6 +215,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
  bool HasFP = hasFP(MF);
  DebugLoc DL = MBB.findDebugLoc(MBBI);

+  // All calls are tail calls in GHC calling conv, and functions have no
+  // prologue/epilogue.
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+    return;
+
  int NumBytes = (int)MFI->getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");
@ -451,6 +456,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
  int NumBytes = MFI->getStackSize();
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

+  // All calls are tail calls in GHC calling conv, and functions have no
+  // prologue/epilogue.
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+    return;
+
  // Initial and residual are named for consitency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t ArgumentPopSize = 0;
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -1990,6 +1990,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
    llvm_unreachable("Unsupported calling convention.");
  case CallingConv::WebKit_JS:
    return CC_AArch64_WebKit_JS;
+  case CallingConv::GHC:
+    return CC_AArch64_GHC;
  case CallingConv::C:
  case CallingConv::Fast:
    if (!Subtarget->isTargetDarwin())
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@ -44,6 +44,10 @@ AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
 const MCPhysReg *
 AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
  assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getFunction()->getCallingConv() == CallingConv::GHC)
+    // GHC set of callee saved regs is empty as all those regs are
+    // used for passing STG regs around
+    return CSR_AArch64_NoRegs_SaveList;
  if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
    return CSR_AArch64_AllRegs_SaveList;
  else
@ -52,6 +56,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {

 const uint32_t *
 AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+  if (CC == CallingConv::GHC)
+    // This is academic becase all GHC calls are (supposed to be) tail calls
+    return CSR_AArch64_NoRegs_RegMask;
  if (CC == CallingConv::AnyReg)
    return CSR_AArch64_AllRegs_RegMask;
  else
@ -67,7 +74,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
 }

 const uint32_t *
-AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
  // This should return a register mask that is the same as that returned by
  // getCallPreservedMask but that additionally preserves the register used for
  // the first i64 argument (which must also be the register used to return a
@ -75,6 +82,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
  //
  // In case that the calling convention does not use the same register for
  // both, the function should return NULL (does not currently apply)
+  assert(CC != CallingConv::GHC && "should not be GHC calling convention.");
  return CSR_AArch64_AAPCS_ThisReturn_RegMask;
 }

--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@ -317,6 +317,42 @@ public:
                                         MachO::CPU_SUBTYPE_ARM64_ALL);
  }

+  bool doesSectionRequireSymbols(const MCSection &Section) const override {
+    // Any section for which the linker breaks things into atoms needs to
+    // preserve symbols, including assembler local symbols, to identify
+    // those atoms. These sections are:
+    // Sections of type:
+    //
+    //    S_CSTRING_LITERALS  (e.g. __cstring)
+    //    S_LITERAL_POINTERS  (e.g.  objc selector pointers)
+    //    S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
+    //
+    // Sections named:
+    //
+    //    __TEXT,__eh_frame
+    //    __TEXT,__ustring
+    //    __DATA,__cfstring
+    //    __DATA,__objc_classrefs
+    //    __DATA,__objc_catlist
+    //
+    // FIXME: It would be better if the compiler used actual linker local
+    // symbols for each of these sections rather than preserving what
+    // are ostensibly assembler local symbols.
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
+    return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
+            SMO.getType() == MachO::S_4BYTE_LITERALS ||
+            SMO.getType() == MachO::S_8BYTE_LITERALS ||
+            SMO.getType() == MachO::S_16BYTE_LITERALS ||
+            SMO.getType() == MachO::S_LITERAL_POINTERS ||
+            (SMO.getSegmentName() == "__TEXT" &&
+             (SMO.getSectionName() == "__eh_frame" ||
+              SMO.getSectionName() == "__ustring")) ||
+            (SMO.getSegmentName() == "__DATA" &&
+             (SMO.getSectionName() == "__cfstring" ||
+              SMO.getSectionName() == "__objc_classrefs" ||
+              SMO.getSectionName() == "__objc_catlist")));
+  }
+
  /// \brief Generate the compact unwind encoding from the CFI directives.
  uint32_t generateCompactUnwindEncoding(
                             ArrayRef<MCCFIInstruction> Instrs) const override {
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@ -10,7 +10,6 @@
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@ -34,7 +33,7 @@ public:
      : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
                                 /*UseAggressiveSymbolFolding=*/true) {}

-  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
                        const MCAsmLayout &Layout, const MCFragment *Fragment,
                        const MCFixup &Fixup, MCValue Target,
                        uint64_t &FixedValue) override;
@ -113,25 +112,8 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
  }
 }

-static bool canUseLocalRelocation(const MCSectionMachO &Section,
-                                  const MCSymbol &Symbol, unsigned Log2Size) {
-  // Debug info sections can use local relocations.
-  if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
-    return true;
-
-  // Otherwise, only pointer sized relocations are supported.
-  if (Log2Size != 3)
-    return false;
-
-  // But only if they don't point to a cstring.
-  if (!Symbol.isInSection())
-    return true;
-  const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
-  return RefSec.getType() != MachO::S_CSTRING_LITERALS;
-}
-
 void AArch64MachObjectWriter::RecordRelocation(
-    MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
+    MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
    const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
    uint64_t &FixedValue) {
  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
@ -141,9 +123,9 @@ void AArch64MachObjectWriter::RecordRelocation(
  unsigned Log2Size = 0;
  int64_t Value = 0;
  unsigned Index = 0;
+  unsigned IsExtern = 0;
  unsigned Type = 0;
  unsigned Kind = Fixup.getKind();
-  const MCSymbolData *RelSymbol = nullptr;

  FixupOffset += Fixup.getOffset();

@ -189,8 +171,10 @@ void AArch64MachObjectWriter::RecordRelocation(
    // FIXME: Should this always be extern?
    // SymbolNum of 0 indicates the absolute section.
    Type = MachO::ARM64_RELOC_UNSIGNED;
+    Index = 0;

    if (IsPCRel) {
+      IsExtern = 1;
      Asm.getContext().FatalError(Fixup.getLoc(),
                                  "PC relative absolute relocation!");

@ -214,12 +198,15 @@ void AArch64MachObjectWriter::RecordRelocation(
        Layout.getSymbolOffset(&B_SD) ==
            Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
      // SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
+      Index = A_Base->getIndex();
+      IsExtern = 1;
      Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
      IsPCRel = 1;
      MachO::any_relocation_info MRE;
      MRE.r_word0 = FixupOffset;
-      MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
-      Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
+      MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
+                     (IsExtern << 27) | (Type << 28));
+      Writer->addRelocation(Fragment->getParent(), MRE);
      return;
    } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
               Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
@ -265,30 +252,25 @@ void AArch64MachObjectWriter::RecordRelocation(
                  ? 0
                  : Writer->getSymbolAddress(B_Base, Layout));

+    Index = A_Base->getIndex();
+    IsExtern = 1;
    Type = MachO::ARM64_RELOC_UNSIGNED;

    MachO::any_relocation_info MRE;
    MRE.r_word0 = FixupOffset;
-    MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
-    Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
+    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
+                   (IsExtern << 27) | (Type << 28));
+    Writer->addRelocation(Fragment->getParent(), MRE);

-    RelSymbol = B_Base;
+    Index = B_Base->getIndex();
+    IsExtern = 1;
    Type = MachO::ARM64_RELOC_SUBTRACTOR;
  } else { // A + constant
    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-    const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
-        Fragment->getParent()->getSection());
-
-    bool CanUseLocalRelocation =
-        canUseLocalRelocation(Section, *Symbol, Log2Size);
-    if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) {
-      const MCSection &Sec = Symbol->getSection();
-      if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
-        Asm.addLocalUsedInReloc(*Symbol);
-    }
-
    const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
    const MCSymbolData *Base = Asm.getAtom(&SD);
+    const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
+        Fragment->getParent()->getSection());

    // If the symbol is a variable and we weren't able to get a Base for it
    // (i.e., it's not in the symbol table associated with a section) resolve
@ -328,13 +310,16 @@ void AArch64MachObjectWriter::RecordRelocation(
    // sections, and for pointer-sized relocations (.quad), we allow section
    // relocations.  It's code sections that run into trouble.
    if (Base) {
-      RelSymbol = Base;
+      Index = Base->getIndex();
+      IsExtern = 1;

      // Add the local offset, if needed.
      if (Base != &SD)
        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
    } else if (Symbol->isInSection()) {
-      if (!CanUseLocalRelocation)
+      // Pointer-sized relocations can use a local relocation. Otherwise,
+      // we have to be in a debug info section.
+      if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
        Asm.getContext().FatalError(
            Fixup.getLoc(),
            "unsupported relocation of local symbol '" + Symbol->getName() +
@ -344,6 +329,7 @@ void AArch64MachObjectWriter::RecordRelocation(
      const MCSectionData &SymSD =
          Asm.getSectionData(SD.getSymbol().getSection());
      Index = SymSD.getOrdinal() + 1;
+      IsExtern = 0;
      Value += Writer->getSymbolAddress(&SD, Layout);

      if (IsPCRel)
@ -376,16 +362,16 @@ void AArch64MachObjectWriter::RecordRelocation(

    MachO::any_relocation_info MRE;
    MRE.r_word0 = FixupOffset;
-    MRE.r_word1 =
-        (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
-    Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
+                   (IsExtern << 27) | (Type << 28));
+    Writer->addRelocation(Fragment->getParent(), MRE);

    // Now set up the Addend relocation.
    Type = MachO::ARM64_RELOC_ADDEND;
    Index = Value;
-    RelSymbol = nullptr;
    IsPCRel = 0;
    Log2Size = 2;
+    IsExtern = 0;

    // Put zero into the instruction itself. The addend is in the relocation.
    Value = 0;
@ -397,9 +383,9 @@ void AArch64MachObjectWriter::RecordRelocation(
  // struct relocation_info (8 bytes)
  MachO::any_relocation_info MRE;
  MRE.r_word0 = FixupOffset;
-  MRE.r_word1 =
-      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
-  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+  MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
+                 (IsExtern << 27) | (Type << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@ -567,10 +567,21 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
      //   MOV  NewBase, Base
      //   ADDS NewBase, #imm8.
      if (Base != NewBase && Offset >= 8) {
+        const ARMSubtarget &Subtarget = MBB.getParent()->getTarget()
+                       .getSubtarget<ARMSubtarget>();
        // Need to insert a MOV to the new base first.
-        BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
-          .addReg(Base, getKillRegState(BaseKill))
-          .addImm(Pred).addReg(PredReg);
+        if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
+            !Subtarget.hasV6Ops()) {
+          // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
+          if (Pred != ARMCC::AL)
+            return false;
+          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
+            .addReg(Base, getKillRegState(BaseKill));
+        } else
+          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
+            .addReg(Base, getKillRegState(BaseKill))
+            .addImm(Pred).addReg(PredReg);
+
        // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
        Base = NewBase;
        BaseKill = false;
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@ -9191,27 +9191,39 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
 // FIXME: This is duplicated in getARMFPUFeatures() in
 // tools/clang/lib/Driver/Tools.cpp
 static const struct {
-  const unsigned Fpu;
+  const unsigned ID;
  const uint64_t Enabled;
  const uint64_t Disabled;
-} Fpus[] = {
-      {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
-      {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
-      {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON},
-      {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON},
-      {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON},
-      {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON},
-      {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16,
-       ARM::FeatureNEON | ARM::FeatureCrypto},
-      {ARM::FP_ARMV8, ARM::FeatureFPARMv8,
-       ARM::FeatureNEON | ARM::FeatureCrypto},
-      {ARM::NEON, ARM::FeatureNEON, 0},
-      {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0},
-      {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON,
-       ARM::FeatureCrypto},
-      {ARM::CRYPTO_NEON_FP_ARMV8,
-       ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0},
-      {ARM::SOFTVFP, 0, 0},
+} FPUs[] = {
+    {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
+    {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
+    {ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON},
+    {ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
+     ARM::FeatureNEON},
+    {ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
+     ARM::FeatureNEON},
+    {ARM::VFPV4_D16,
+     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16,
+     ARM::FeatureNEON},
+    {ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+                        ARM::FeatureFPARMv8 | ARM::FeatureD16,
+     ARM::FeatureNEON | ARM::FeatureCrypto},
+    {ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+                        ARM::FeatureFPARMv8,
+     ARM::FeatureNEON | ARM::FeatureCrypto},
+    {ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0},
+    {ARM::NEON_VFPV4,
+     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON,
+     0},
+    {ARM::NEON_FP_ARMV8,
+     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+         ARM::FeatureFPARMv8 | ARM::FeatureNEON,
+     ARM::FeatureCrypto},
+    {ARM::CRYPTO_NEON_FP_ARMV8,
+     ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
+         ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto,
+     0},
+    {ARM::SOFTVFP, 0, 0},
 };

 /// parseDirectiveFPU
@ -9229,14 +9241,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
    return false;
  }

-  for (const auto &Fpu : Fpus) {
-    if (Fpu.Fpu != ID)
+  for (const auto &Entry : FPUs) {
+    if (Entry.ID != ID)
      continue;

    // Need to toggle features that should be on but are off and that
    // should off but are on.
-    uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) |
-                      (Fpu.Disabled & STI.getFeatureBits());
+    uint64_t Toggle = (Entry.Enabled & ~STI.getFeatureBits()) |
+                      (Entry.Disabled & STI.getFeatureBits());
    setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
    break;
  }
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@ -54,10 +54,10 @@ public:
    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                               /*UseAggressiveSymbolFolding=*/true) {}

-  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
-                        const MCAsmLayout &Layout, const MCFragment *Fragment,
-                        const MCFixup &Fixup, MCValue Target,
-                        uint64_t &FixedValue) override;
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) override;
 };
 }

@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
                   (IsPCRel               << 30) |
                   MachO::R_SCATTERED);
    MRE.r_word1 = Value2;
-    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+    Writer->addRelocation(Fragment->getParent(), MRE);
  }

  MachO::any_relocation_info MRE;
@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
                 (IsPCRel     << 30) |
                 MachO::R_SCATTERED);
  MRE.r_word1 = Value;
-  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
                   (IsPCRel               << 30) |
                   MachO::R_SCATTERED);
    MRE.r_word1 = Value2;
-    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+    Writer->addRelocation(Fragment->getParent(), MRE);
  }

  MachO::any_relocation_info MRE;
@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
                 (IsPCRel     << 30) |
                 MachO::R_SCATTERED);
  MRE.r_word1 = Value;
-  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
@ -351,10 +351,11 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
 }

 void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
-                                           MCAssembler &Asm,
+                                           const MCAssembler &Asm,
                                           const MCAsmLayout &Layout,
                                           const MCFragment *Fragment,
-                                           const MCFixup &Fixup, MCValue Target,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
                                           uint64_t &FixedValue) {
  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
  unsigned Log2Size;
@ -400,8 +401,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
  // See <reloc.h>.
  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
  unsigned Index = 0;
+  unsigned IsExtern = 0;
  unsigned Type = 0;
-  const MCSymbolData *RelSymbol = nullptr;

  if (Target.isAbsolute()) { // constant
    // FIXME!
@ -421,7 +422,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
    // Check whether we need an external or internal relocation.
    if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
                                 FixedValue)) {
-      RelSymbol = SD;
+      IsExtern = 1;
+      Index = SD->getIndex();

      // For external relocations, make sure to offset the fixup value to
      // compensate for the addend of the symbol address, if it was
@ -445,8 +447,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
  // struct relocation_info (8 bytes)
  MachO::any_relocation_info MRE;
  MRE.r_word0 = FixupOffset;
-  MRE.r_word1 =
-      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+  MRE.r_word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));

  // Even when it's not a scattered relocation, movw/movt always uses
  // a PAIR relocation.
@ -471,10 +476,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
                       (Log2Size              << 25) |
                       (MachO::ARM_RELOC_PAIR << 28));

-    Writer->addRelocation(nullptr, Fragment->getParent(), MREPair);
+    Writer->addRelocation(Fragment->getParent(), MREPair);
  }

-  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@ -15,6 +15,7 @@
 #include "Hexagon.h"
 #include "HexagonTargetMachine.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
@ -42,7 +43,7 @@ namespace {
    void getAnalysisUsage(AnalysisUsage &AU) const override {
      AU.addRequired<MachineFunctionAnalysis>();
      AU.addPreserved<MachineFunctionAnalysis>();
-      AU.addPreserved("stack-protector");
+      AU.addPreserved<StackProtector>();
      FunctionPass::getAnalysisUsage(AU);
    }
  };
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@ -497,14 +497,14 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
  SDValue JumpTarget = Callee;

  // T9 should contain the address of the callee function if
-  // -reloction-model=pic or it is an indirect call.
+  // -relocation-model=pic or it is an indirect call.
  if (IsPICCall || !GlobalOrExternal) {
    unsigned V0Reg = Mips::V0;
    if (NeedMips16Helper) {
      RegsToPass.push_front(std::make_pair(V0Reg, Callee));
      JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
      ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
-      JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG,
+      JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG,
                                 MipsII::MO_GOT, Chain,
                                 FuncInfo->callPtrInfo(S->getSymbol()));
    } else
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@ -756,7 +756,7 @@ def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,
      ISA_MIPS32R6;
 def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,
      ISA_MIPS32R6;
-def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
+def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
      ISA_MIPS32R6;
 def : MipsPat<(setne f32:$lhs, f32:$rhs),
              (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
@ -776,7 +776,7 @@ def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,
      ISA_MIPS32R6;
 def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,
      ISA_MIPS32R6;
-def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
+def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
      ISA_MIPS32R6;
 def : MipsPat<(setne f64:$lhs, f64:$rhs),
              (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@ -1613,22 +1613,22 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,

    if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine()))
      // %gp_rel relocation
-      return getAddrGPRel(N, Ty, DAG);
+      return getAddrGPRel(N, SDLoc(N), Ty, DAG);

    // %hi/%lo relocation
-    return getAddrNonPIC(N, Ty, DAG);
+    return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
  }

  if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
-    return getAddrLocal(N, Ty, DAG,
+    return getAddrLocal(N, SDLoc(N), Ty, DAG,
                        Subtarget.isABI_N32() || Subtarget.isABI_N64());

  if (LargeGOT)
-    return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
+    return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16,
                                 MipsII::MO_GOT_LO16, DAG.getEntryNode(),
                                 MachinePointerInfo::getGOT());

-  return getAddrGlobal(N, Ty, DAG,
+  return getAddrGlobal(N, SDLoc(N), Ty, DAG,
                       (Subtarget.isABI_N32() || Subtarget.isABI_N64())
                           ? MipsII::MO_GOT_DISP
                           : MipsII::MO_GOT16,
@ -1642,9 +1642,9 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,

  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
      !Subtarget.isABI_N64())
-    return getAddrNonPIC(N, Ty, DAG);
+    return getAddrNonPIC(N, SDLoc(N), Ty, DAG);

-  return getAddrLocal(N, Ty, DAG,
+  return getAddrLocal(N, SDLoc(N), Ty, DAG,
                      Subtarget.isABI_N32() || Subtarget.isABI_N64());
 }

@ -1735,9 +1735,9 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const

  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
      !Subtarget.isABI_N64())
-    return getAddrNonPIC(N, Ty, DAG);
+    return getAddrNonPIC(N, SDLoc(N), Ty, DAG);

-  return getAddrLocal(N, Ty, DAG,
+  return getAddrLocal(N, SDLoc(N), Ty, DAG,
                      Subtarget.isABI_N32() || Subtarget.isABI_N64());
 }

@ -1754,12 +1754,12 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const

    if (TLOF.IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
      // %gp_rel relocation
-      return getAddrGPRel(N, Ty, DAG);
+      return getAddrGPRel(N, SDLoc(N), Ty, DAG);

-    return getAddrNonPIC(N, Ty, DAG);
+    return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
  }

-  return getAddrLocal(N, Ty, DAG,
+  return getAddrLocal(N, SDLoc(N), Ty, DAG,
                      Subtarget.isABI_N32() || Subtarget.isABI_N64());
 }

@ -2681,15 +2681,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      InternalLinkage = Val->hasInternalLinkage();

      if (InternalLinkage)
-        Callee = getAddrLocal(G, Ty, DAG,
+        Callee = getAddrLocal(G, DL, Ty, DAG,
                              Subtarget.isABI_N32() || Subtarget.isABI_N64());
      else if (LargeGOT) {
-        Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
+        Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16,
                                       MipsII::MO_CALL_LO16, Chain,
                                       FuncInfo->callPtrInfo(Val));
        IsCallReloc = true;
      } else {
-        Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
+        Callee = getAddrGlobal(G, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
                               FuncInfo->callPtrInfo(Val));
        IsCallReloc = true;
      }
@ -2702,15 +2702,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    const char *Sym = S->getSymbol();

    if (!Subtarget.isABI_N64() && !IsPIC) // !N64 && static
-      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
-                                            MipsII::MO_NO_FLAG);
+      Callee =
+          DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG);
    else if (LargeGOT) {
-      Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16,
+      Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16,
                                     MipsII::MO_CALL_LO16, Chain,
                                     FuncInfo->callPtrInfo(Sym));
      IsCallReloc = true;
    } else { // N64 || PIC
-      Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
+      Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
                             FuncInfo->callPtrInfo(Sym));
      IsCallReloc = true;
    }
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@ -272,9 +272,8 @@ namespace llvm {
    //
    // (add (load (wrapper $gp, %got(sym)), %lo(sym))
    template <class NodeTy>
-    SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+    SDValue getAddrLocal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG,
                         bool IsN32OrN64) const {
-      SDLoc DL(N);
      unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
      SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
                                getTargetNode(N, Ty, DAG, GOTFlag));
@ -291,11 +290,10 @@ namespace llvm {
    // computing a global symbol's address:
    //
    // (load (wrapper $gp, %got(sym)))
-    template<class NodeTy>
-    SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+    template <class NodeTy>
+    SDValue getAddrGlobal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG,
                          unsigned Flag, SDValue Chain,
                          const MachinePointerInfo &PtrInfo) const {
-      SDLoc DL(N);
      SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
                                getTargetNode(N, Ty, DAG, Flag));
      return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0);
@ -305,14 +303,13 @@ namespace llvm {
    // computing a global symbol's address in large-GOT mode:
    //
    // (load (wrapper (add %hi(sym), $gp), %lo(sym)))
-    template<class NodeTy>
-    SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG,
-                                  unsigned HiFlag, unsigned LoFlag,
-                                  SDValue Chain,
+    template <class NodeTy>
+    SDValue getAddrGlobalLargeGOT(NodeTy *N, SDLoc DL, EVT Ty,
+                                  SelectionDAG &DAG, unsigned HiFlag,
+                                  unsigned LoFlag, SDValue Chain,
                                  const MachinePointerInfo &PtrInfo) const {
-      SDLoc DL(N);
-      SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty,
-                               getTargetNode(N, Ty, DAG, HiFlag));
+      SDValue Hi =
+          DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag));
      Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
      SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
                                    getTargetNode(N, Ty, DAG, LoFlag));
@ -324,9 +321,9 @@ namespace llvm {
    // computing a symbol's address in non-PIC mode:
    //
    // (add %hi(sym), %lo(sym))
-    template<class NodeTy>
-    SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
-      SDLoc DL(N);
+    template <class NodeTy>
+    SDValue getAddrNonPIC(NodeTy *N, SDLoc DL, EVT Ty,
+                          SelectionDAG &DAG) const {
      SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI);
      SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO);
      return DAG.getNode(ISD::ADD, DL, Ty,
@ -338,9 +335,8 @@ namespace llvm {
    // computing a symbol's address using gp-relative addressing:
    //
    // (add $gp, %gp_rel(sym))
-    template<class NodeTy>
-    SDValue getAddrGPRel(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
-      SDLoc DL(N);
+    template <class NodeTy>
+    SDValue getAddrGPRel(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG) const {
      assert(Ty == MVT::i32);
      SDValue GPRel = getTargetNode(N, Ty, DAG, MipsII::MO_GPREL);
      return DAG.getNode(ISD::ADD, DL, Ty,
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H

 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Pass.h"

@ -32,8 +33,8 @@ public:

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<DataLayoutPass>();
-    AU.addPreserved("stack-protector");
    AU.addPreserved<MachineFunctionAnalysis>();
+    AU.addPreserved<StackProtector>();
  }

  const char *getPassName() const override {
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@ -16,6 +16,7 @@
 #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H

 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Pass.h"

@ -29,8 +30,8 @@ struct NVPTXLowerAggrCopies : public FunctionPass {

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<DataLayoutPass>();
-    AU.addPreserved("stack-protector");
    AU.addPreserved<MachineFunctionAnalysis>();
+    AU.addPreserved<StackProtector>();
  }

  bool runOnFunction(Function &F) override;
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@ -41,7 +41,7 @@ public:
      : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                                 /*UseAggressiveSymbolFolding=*/Is64Bit) {}

-  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
                        const MCAsmLayout &Layout, const MCFragment *Fragment,
                        const MCFixup &Fixup, MCValue Target,
                        uint64_t &FixedValue) override {
@ -282,7 +282,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
    MachO::any_relocation_info MRE;
    makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
                                Log2Size, IsPCRel, Value2);
-    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+    Writer->addRelocation(Fragment->getParent(), MRE);
  } else {
    // If the offset is more than 24-bits, it won't fit in a scattered
    // relocation offset field, so we fall back to using a non-scattered
@ -296,7 +296,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
  }
  MachO::any_relocation_info MRE;
  makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
-  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+  Writer->addRelocation(Fragment->getParent(), MRE);
  return true;
 }

@ -331,9 +331,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(
  // See <reloc.h>.
  const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
  unsigned Index = 0;
+  unsigned IsExtern = 0;
  unsigned Type = RelocType;

-  const MCSymbolData *RelSymbol = nullptr;
  if (Target.isAbsolute()) { // constant
                             // SymbolNum of 0 indicates the absolute section.
                             //
@ -355,7 +355,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(

    // Check whether we need an external or internal relocation.
    if (Writer->doesSymbolRequireExternRelocation(SD)) {
-      RelSymbol = SD;
+      IsExtern = 1;
+      Index = SD->getIndex();
      // For external relocations, make sure to offset the fixup value to
      // compensate for the addend of the symbol address, if it was
      // undefined. This occurs with weak definitions, for example.
@ -374,8 +375,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(

  // struct relocation_info (8 bytes)
  MachO::any_relocation_info MRE;
-  makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
-  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+  makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
+                     Type);
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
--- a/contrib/llvm/lib/Target/R600/AMDGPU.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.h
@ -77,7 +77,11 @@ extern Target TheGCNTarget;

 namespace AMDGPU {
 enum TargetIndex {
-  TI_CONSTDATA_START
+  TI_CONSTDATA_START,
+  TI_SCRATCH_RSRC_DWORD0,
+  TI_SCRATCH_RSRC_DWORD1,
+  TI_SCRATCH_RSRC_DWORD2,
+  TI_SCRATCH_RSRC_DWORD3
 };
 }

--- a/contrib/llvm/lib/Target/R600/AMDGPU.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.td
@ -92,6 +92,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
        "true",
        "Support flat address space">;

+def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
+        "EnableVGPRSpilling",
+        "true",
+        "Enable spilling of VGPRs to scratch memory">;
+
 class SubtargetFeatureFetchLimit <string Value> :
                          SubtargetFeature <"fetch"#Value,
        "TexVTXClauseSize",
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@ -116,7 +116,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
  SIProgramInfo KernelInfo;
  if (STM.isAmdHsaOS()) {
-    OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
    getSIProgramInfo(KernelInfo, MF);
    EmitAmdKernelCodeT(MF, KernelInfo);
    OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
@ -421,6 +420,7 @@ static unsigned getRsrcReg(unsigned ShaderType) {

 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
                                         const SIProgramInfo &KernelInfo) {
+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  unsigned RsrcReg = getRsrcReg(MFI->getShaderType());

@ -441,6 +441,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
    OutStreamer.EmitIntValue(RsrcReg, 4);
    OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
                             S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
+    if (STM.isVGPRSpillingEnabled(MFI)) {
+      OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+      OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+    }
  }

  if (MFI->getShaderType() == ShaderType::PIXEL) {
@ -504,6 +508,19 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,

  header.wavefront_size = STM.getWavefrontSize();

+  const MCSectionELF *VersionSection = OutContext.getELFSection(".hsa.version",
+      ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly());
+  OutStreamer.SwitchSection(VersionSection);
+  OutStreamer.EmitBytes(Twine("HSA Code Unit:" +
+                        Twine(header.hsail_version_major) + "." +
+                        Twine(header.hsail_version_minor) + ":" +
+                        "AMD:" +
+                        Twine(header.amd_code_version_major) + "." +
+                        Twine(header.amd_code_version_minor) +  ":" +
+                        "GFX8.1:0").str());
+
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
  if (isVerbose()) {
    OutStreamer.emitRawComment("amd_code_version_major = " +
                               Twine(header.amd_code_version_major), false);
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@ -417,6 +417,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
                                  N->getValueType(0), Ops);
  }

+  case ISD::LOAD: {
+    // To simplify the TableGen patters, we replace all i64 loads with
+    // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32
+    // during DAG legalization, however, so places (ExpandUnalignedLoad)
+    // in the DAG legalizer assume that if i64 is legal, so doing this
+    // promotion early can cause problems.
+    EVT VT = N->getValueType(0);
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
+      break;
+
+    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
+                                     LD->getBasePtr(), LD->getMemOperand());
+    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+                                      MVT::i64, NewLoad);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
+    SelectCode(NewLoad.getNode());
+    N = BitCast.getNode();
+    break;
+  }
+
  case AMDGPUISD::REGISTER_LOAD: {
    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
      break;
@ -962,16 +984,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
  const SITargetLowering& Lowering =
    *static_cast<const SITargetLowering*>(getTargetLowering());

-  unsigned ScratchPtrReg =
-      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
  unsigned ScratchOffsetReg =
      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
  Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
                                ScratchOffsetReg, MVT::i32);
+  SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
+  SDValue ScratchRsrcDword0 =
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);

-  SDValue ScratchPtr =
-    CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
-                           MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
+  SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
+  SDValue ScratchRsrcDword1 =
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
+
+  const SDValue RsrcOps[] = {
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+      ScratchRsrcDword0,
+      CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+      ScratchRsrcDword1,
+      CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+  };
+  SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+                                              MVT::v2i32, RsrcOps), 0);
  Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
  SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
      MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
@ -988,22 +1021,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
    }
  }

-  // (add FI, n0)
-  if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
-       isa<FrameIndexSDNode>(Addr.getOperand(0))) {
-    VAddr = Addr.getOperand(1);
-    ImmOffset = Addr.getOperand(0);
-    return true;
-  }
-
-  // (FI)
-  if (isa<FrameIndexSDNode>(Addr)) {
-    VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
-                                          CurDAG->getConstant(0, MVT::i32)), 0);
-    ImmOffset = Addr;
-    return true;
-  }
-
  // (node)
  VAddr = Addr;
  ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@ -187,9 +187,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
  AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);

-  setOperationAction(ISD::LOAD, MVT::i64, Promote);
-  AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
-
  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);

--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
@ -341,8 +341,39 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
 // instead.
 namespace llvm {
 namespace AMDGPU {
-int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
  return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
 }
 }
 }
+
+// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
+enum SISubtarget {
+  SI = 0,
+  VI = 1
+};
+
+enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
+  switch (Gen) {
+  default:
+    return SI;
+  case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+    return VI;
+  }
+}
+
+int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
+  int MCOp = AMDGPU::getMCOpcode(Opcode,
+                        AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+
+  // -1 means that Opcode is already a native instruction.
+  if (MCOp == -1)
+    return Opcode;
+
+  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
+  // no encoding in the given subtarget generation.
+  if (MCOp == (uint16_t)-1)
+    return -1;
+
+  return MCOp;
+}
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
@ -135,6 +135,11 @@ public:
  bool isRegisterStore(const MachineInstr &MI) const;
  bool isRegisterLoad(const MachineInstr &MI) const;

+  /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
+  /// Return -1 if the target-specific opcode for the pseudo instruction does
+  /// not exist. If Opcode is not a pseudo instruction, this is identity.
+  int pseudoToMCOpcode(int Opcode) const;
+
 //===---------------------------------------------------------------------===//
 // Pure virtual funtions to be implemented by sub-classes.
 //===---------------------------------------------------------------------===//
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
@ -22,6 +22,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
@ -39,29 +40,17 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
  Ctx(ctx), ST(st)
 { }

-enum AMDGPUMCInstLower::SISubtarget
-AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const {
-  switch (Gen) {
-  default:
-    return AMDGPUMCInstLower::SI;
-  case AMDGPUSubtarget::VOLCANIC_ISLANDS:
-    return AMDGPUMCInstLower::VI;
-  }
-}
-
-unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const {
-
-  int MCOpcode = AMDGPU::getMCOpcode(MIOpcode,
-                              AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
-  if (MCOpcode == -1)
-    MCOpcode = MIOpcode;
-
-  return MCOpcode;
-}
-
 void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {

-  OutMI.setOpcode(getMCOpcode(MI->getOpcode()));
+  int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
+
+  if (MCOpcode == -1) {
+    LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
+    C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
+                "a target-specific version: " + Twine(MI->getOpcode()));
+  }
+
+  OutMI.setOpcode(MCOpcode);

  for (const MachineOperand &MO : MI->explicit_operands()) {
    MCOperand MCOp;
@ -91,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
      MCOp = MCOperand::CreateExpr(Expr);
      break;
    }
+    case MachineOperand::MO_ExternalSymbol: {
+      MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
+      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+      MCOp = MCOperand::CreateExpr(Expr);
+      break;
+    }
    }
    OutMI.addOperand(MCOp);
  }
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.h
@ -19,23 +19,9 @@ class MCContext;
 class MCInst;

 class AMDGPUMCInstLower {
-
-  // This must be kept in sync with the SISubtarget class in SIInstrInfo.td
-  enum SISubtarget {
-    SI = 0,
-    VI = 1
-  };
-
  MCContext &Ctx;
  const AMDGPUSubtarget &ST;

-  /// Convert a member of the AMDGPUSubtarget::Generation enum to the
-  /// SISubtarget enum.
-  enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const;
-
-  /// Get the MC opcode for this MachineInstr.
-  unsigned getMCOpcode(unsigned MIOpcode) const;
-
 public:
  AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);

--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@ -18,7 +18,9 @@
 #include "R600MachineScheduler.h"
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineScheduler.h"

 using namespace llvm;

@ -78,6 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
      FlatAddressSpace(false), EnableIRStructurizer(true),
      EnablePromoteAlloca(false), EnableIfCvt(true),
      EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+      EnableVGPRSpilling(false),
      DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
      FrameLowering(TargetFrameLowering::StackGrowsUp,
                    64 * 16, // Maximum stack alignment (long16)
@ -113,3 +116,26 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
  case SEA_ISLANDS: return 12;
  }
 }
+
+bool AMDGPUSubtarget::isVGPRSpillingEnabled(
+                                       const SIMachineFunctionInfo *MFI) const {
+  return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
+}
+
+void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+                                          MachineInstr *begin,
+                                          MachineInstr *end,
+                                          unsigned NumRegionInstrs) const {
+  if (getGeneration() >= SOUTHERN_ISLANDS) {
+
+    // Track register pressure so the scheduler can try to decrease
+    // pressure once register usage is above the threshold defined by
+    // SIRegisterInfo::getRegPressureSetLimit()
+    Policy.ShouldTrackPressure = true;
+
+    // Enabling both top down and bottom up scheduling seems to give us less
+    // register spills than just using one of these approaches on its own.
+    Policy.OnlyTopDown = false;
+    Policy.OnlyBottomUp = false;
+  }
+}
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
@ -30,6 +30,8 @@

 namespace llvm {

+class SIMachineFunctionInfo;
+
 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {

 public:
@ -63,6 +65,7 @@ private:
  unsigned WavefrontSize;
  bool CFALUBug;
  int LocalMemorySize;
+  bool EnableVGPRSpilling;

  const DataLayout DL;
  AMDGPUFrameLowering FrameLowering;
@ -206,6 +209,10 @@ public:
    return getGeneration() <= NORTHERN_ISLANDS;
  }

+  void overrideSchedPolicy(MachineSchedPolicy &Policy,
+                           MachineInstr *begin, MachineInstr *end,
+                           unsigned NumRegionInstrs) const override;
+
  // Helper functions to simplify if statements
  bool isTargetELF() const {
    return false;
@ -224,6 +231,15 @@ public:
  bool isAmdHsaOS() const {
    return TargetTriple.getOS() == Triple::AMDHSA;
  }
+  bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+
+  unsigned getMaxWavesPerCU() const {
+    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 10;
+
+    // FIXME: Not sure what this is for other subtagets.
+    llvm_unreachable("do not know max waves per CU for this subtarget.");
+  }
 };

 } // End namespace llvm
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@ -29,7 +29,7 @@ public:
                                const MCAsmLayout &Layout) override {
    //XXX: Implement if necessary.
  }
-  void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                        const MCFragment *Fragment, const MCFixup &Fixup,
                        MCValue Target, bool &IsPCRel,
                        uint64_t &FixedValue) override {
--- a/contrib/llvm/lib/Target/R600/SIDefines.h
+++ b/contrib/llvm/lib/Target/R600/SIDefines.h
@ -163,4 +163,8 @@ namespace SIOutMods {
 #define R_00B860_COMPUTE_TMPRING_SIZE                                   0x00B860
 #define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)

+#define R_0286E8_SPI_TMPRING_SIZE                                       0x0286E8
+#define   S_0286E8_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
+
+
 #endif
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
@ -588,6 +588,12 @@ SDValue SITargetLowering::LowerFormalArguments(

    InVals.push_back(Val);
  }
+
+  if (Info->getShaderType() != ShaderType::COMPUTE) {
+    unsigned ScratchIdx = CCInfo.getFirstUnallocated(
+        AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs());
+    Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
+  }
  return Chain;
 }

--- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
@ -85,49 +85,41 @@ class Enc64 {

 let Uses = [EXEC] in {

-class VOPCCommon <dag ins, string asm, list<dag> pattern> :
-    InstSI <(outs VCCReg:$dst), ins, asm, pattern> {
+class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {

-  let DisableEncoding = "$dst";
  let mayLoad = 0;
  let mayStore = 0;
  let hasSideEffects = 0;
  let UseNamedOperandTable = 1;
-  let VOPC = 1;
  let VALU = 1;
+}
+
+class VOPCCommon <dag ins, string asm, list<dag> pattern> :
+    VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
+
+  let DisableEncoding = "$dst";
+  let VOPC = 1;
  let Size = 4;
 }

 class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI <outs, ins, asm, pattern> {
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
+    VOPAnyCommon <outs, ins, asm, pattern> {
+
  let VOP1 = 1;
-  let VALU = 1;
  let Size = 4;
 }

 class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI <outs, ins, asm, pattern> {
+    VOPAnyCommon <outs, ins, asm, pattern> {

-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
  let VOP2 = 1;
-  let VALU = 1;
  let Size = 4;
 }

 class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI <outs, ins, asm, pattern> {
+    VOPAnyCommon <outs, ins, asm, pattern> {

-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
  // Using complex patterns gives VOP3 patterns a very high complexity rating,
  // but standalone patterns are almost always prefered, so we need to adjust the
  // priority lower.  The goal is to use a high number to reduce complexity to
@ -135,8 +127,6 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
  let AddedComplexity = -1000;

  let VOP3 = 1;
-  let VALU = 1;
-
  int Size = 8;
 }

--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
@ -430,15 +430,6 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
  return AMDGPU::COPY;
 }

-static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
-
-  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-
-  // FIXME: Implement spilling for other shader types.
-  return MFI->getShaderType() == ShaderType::COMPUTE;
-
-}
-
 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MI,
                                      unsigned SrcReg, bool isKill,
@ -462,7 +453,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
      case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
      case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
    }
-  } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
    MFI->setHasSpilledVGPRs();

    switch(RC->getSize() * 8) {
@ -482,7 +473,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
            .addFrameIndex(FrameIndex)
            // Place-holder registers, these will be filled in by
            // SIPrepareScratchRegs.
-            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
            .addReg(AMDGPU::SGPR0, RegState::Undef);
  } else {
    LLVMContext &Ctx = MF->getFunction()->getContext();
@ -499,6 +490,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                       const TargetRegisterClass *RC,
                                       const TargetRegisterInfo *TRI) const {
  MachineFunction *MF = MBB.getParent();
+  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  DebugLoc DL = MBB.findDebugLoc(MI);
  int Opcode = -1;
@ -511,7 +503,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
      case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
      case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
    }
-  } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
    switch(RC->getSize() * 8) {
      case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
      case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
@ -528,7 +520,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
            .addFrameIndex(FrameIndex)
            // Place-holder registers, these will be filled in by
            // SIPrepareScratchRegs.
-            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
            .addReg(AMDGPU::SGPR0, RegState::Undef);

  } else {
@ -615,7 +607,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
              .addImm(-1)
              .addImm(0);

-      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32),
+      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
              TIDReg)
              .addImm(-1)
              .addReg(TIDReg);
@ -1053,7 +1045,11 @@ bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
 }

 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
-  return AMDGPU::getVOPe32(Opcode) != -1;
+  int Op32 = AMDGPU::getVOPe32(Opcode);
+  if (Op32 == -1)
+    return false;
+
+  return pseudoToMCOpcode(Op32) != -1;
 }

 bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
@ -1126,12 +1122,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
    }

    switch (Desc.OpInfo[i].OperandType) {
-    case MCOI::OPERAND_REGISTER: {
-      if (MI->getOperand(i).isImm() &&
-          !isImmOperandLegal(MI, i, MI->getOperand(i))) {
-          ErrInfo = "Illegal immediate value for operand.";
-          return false;
-        }
+    case MCOI::OPERAND_REGISTER:
+      if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
+        ErrInfo = "Illegal immediate value for operand.";
+        return false;
+      }
+      break;
+    case AMDGPU::OPERAND_REG_IMM32:
+      break;
+    case AMDGPU::OPERAND_REG_INLINE_C:
+      if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
+        ErrInfo = "Illegal immediate value for operand.";
+        return false;
      }
      break;
    case MCOI::OPERAND_IMMEDIATE:
@ -1287,7 +1289,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
  case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
  case AMDGPU::S_LOAD_DWORDX4_IMM:
  case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
-  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
  }
@ -2278,7 +2280,7 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
  MachineOperand &Dest = Inst->getOperand(0);
  MachineOperand &Src = Inst->getOperand(1);

-  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
+  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
  const TargetRegisterClass *SrcRC = Src.isReg() ?
    MRI.getRegClass(Src.getReg()) :
    &AMDGPU::SGPR_32RegClass;
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
@ -325,7 +325,6 @@ namespace AMDGPU {
  int getVOPe32(uint16_t Opcode);
  int getCommuteRev(uint16_t Opcode);
  int getCommuteOrig(uint16_t Opcode);
-  int getMCOpcode(uint16_t Opcode, unsigned Gen);
  int getAddr64Inst(uint16_t Opcode);
  int getAtomicRetOp(uint16_t Opcode);
  int getAtomicNoRetOp(uint16_t Opcode);
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@ -36,6 +36,12 @@ class vop2 <bits<6> si, bits<6> vi = si> : vop {
  field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
 }

+// Specify a VOP2 opcode for SI and VOP3 opcode for VI
+// that doesn't have VOP2 encoding on VI
+class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
+  let VI3 = vi;
+}
+
 class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
  let SI3 = si;
  let VI3 = vi;
@ -57,7 +63,7 @@ class sopk <bits<5> si, bits<5> vi = si> {
 }

 // Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
-// in AMDGPUMCInstLower.h
+// in AMDGPUInstrInfo.cpp
 def SISubtarget {
  int NONE = -1;
  int SI = 0;
@ -731,7 +737,7 @@ class getAsm32 <int NumSrcArgs> {
 // Returns the assembly string for the inputs and outputs of a VOP3
 // instruction.
 class getAsm64 <int NumSrcArgs, bit HasModifiers> {
-  string src0 = "$src0_modifiers,";
+  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
  string src1 = !if(!eq(NumSrcArgs, 1), "",
                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
                                           " $src1_modifiers,"));
@ -848,6 +854,16 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
  let isPseudo = 1;
 }

+multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
+                     string opName, string revOpSI> {
+  def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
+
+  def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
+            VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
+            SIMCInstr <opName#"_e32", SISubtarget.SI>;
+}
+
 multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
                   string opName, string revOpSI, string revOpVI> {
  def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
@ -889,16 +905,6 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
  VOP3e_vi <op>,
  SIMCInstr <opName#"_e64", SISubtarget.VI>;

-// VI only instruction
-class VOP3_vi <bits<10> op, string opName, dag outs, dag ins, string asm,
-               list<dag> pattern, int NumSrcArgs, bit HasMods = 1> :
-  VOP3Common <outs, ins, asm, pattern>,
-  VOP <opName>,
-  VOP3e_vi <op>,
-  VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
-                    !if(!eq(NumSrcArgs, 2), 0, 1),
-                    HasMods>;
-
 multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
                   string opName, int NumSrcArgs, bit HasMods = 1> {

@ -998,6 +1004,23 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
  }
 }

+// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
+multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
+                         string asm, list<dag> pattern = []> {
+  let isPseudo = 1 in {
+    def "" : VOPAnyCommon <outs, ins, "", pattern>,
+             SIMCInstr<opName, SISubtarget.NONE>;
+  }
+
+  def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
+            SIMCInstr <opName, SISubtarget.SI>;
+
+  def _vi : VOP3Common <outs, ins, asm, []>,
+            VOP3e_vi <op.VI3>,
+            VOP3DisableFields <1, 0, 0>,
+            SIMCInstr <opName, SISubtarget.VI>;
+}
+
 multiclass VOP1_Helper <vop1 op, string opName, dag outs,
                        dag ins32, string asm32, list<dag> pat32,
                        dag ins64, string asm64, list<dag> pat64,
@ -1089,6 +1112,33 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
  revOp, P.HasModifiers
 >;

+// A VOP2 instruction that is VOP3-only on VI.
+multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
+                            dag ins32, string asm32, list<dag> pat32,
+                            dag ins64, string asm64, list<dag> pat64,
+                            string revOpSI, string revOpVI, bit HasMods> {
+  defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>;
+
+  defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
+                        revOpSI, revOpVI, HasMods>;
+}
+
+multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
+                          SDPatternOperator node = null_frag,
+                          string revOpSI = opName, string revOpVI = revOpSI>
+                          : VOP2_VI3_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst,
+           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i1:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+  revOpSI, revOpVI, P.HasModifiers
+>;
+
 class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
  VOPCCommon <ins, "", pattern>,
  VOP <opName>,
@ -1224,34 +1274,6 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
  P.NumSrcArgs, P.HasModifiers
 >;

-class VOP3InstVI <bits<10> op, string opName, VOPProfile P,
-                  SDPatternOperator node = null_frag> : VOP3_vi <
-  op, opName#"_vi", P.Outs, P.Ins64, opName#P.Asm64,
-  !if(!eq(P.NumSrcArgs, 3),
-    !if(P.HasModifiers,
-        [(set P.DstVT:$dst,
-            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                       i1:$clamp, i32:$omod)),
-                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
-                  (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
-        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
-                                  P.Src2VT:$src2))]),
-  !if(!eq(P.NumSrcArgs, 2),
-    !if(P.HasModifiers,
-        [(set P.DstVT:$dst,
-            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                       i1:$clamp, i32:$omod)),
-                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
-        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
-  /* P.NumSrcArgs == 1 */,
-    !if(P.HasModifiers,
-        [(set P.DstVT:$dst,
-            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                       i1:$clamp, i32:$omod))))],
-        [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
-  P.NumSrcArgs, P.HasModifiers
->;
-
 multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
                    string opName, list<dag> pattern> :
  VOP3b_2_m <
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@ -1525,25 +1525,25 @@ defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
 } // End Uses = [VCC]
 } // End isCommutable = 1, Defs = [VCC]

-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-def V_READLANE_B32 : VOP2 <
-  0x00000001,
+defm V_READLANE_B32 : VOP2SI_3VI_m <
+  vop3 <0x001, 0x289>,
+  "v_readlane_b32",
  (outs SReg_32:$vdst),
  (ins VGPR_32:$src0, SSrc_32:$vsrc1),
-  "v_readlane_b32 $vdst, $src0, $vsrc1",
-  []
+  "v_readlane_b32 $vdst, $src0, $vsrc1"
 >;

-def V_WRITELANE_B32 : VOP2 <
-  0x00000002,
+defm V_WRITELANE_B32 : VOP2SI_3VI_m <
+  vop3 <0x002, 0x28a>,
+  "v_writelane_b32",
  (outs VGPR_32:$vdst),
  (ins SReg_32:$src0, SSrc_32:$vsrc1),
-  "v_writelane_b32 $vdst, $src0, $vsrc1",
-  []
+  "v_writelane_b32 $vdst, $src0, $vsrc1"
 >;

+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
 let isCommutable = 1 in {
 defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
  VOP_F32_F32_F32
@ -1568,30 +1568,33 @@ defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
 }

 } // End isCommutable = 1
+} // End let SubtargetPredicate = SICI

-defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32", VOP_I32_I32_I32,
-  AMDGPUbfm>;
-defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
-defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32",
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
+  AMDGPUbfm
+>;
+defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
  VOP_I32_I32_I32
 >;
-defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32",
+defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
  VOP_I32_I32_I32
 >;
-defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
+defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
+  VOP_I32_I32_I32
+>;
+defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
  VOP_F32_F32_I32, AMDGPUldexp
 >;

 ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
 ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
 ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
+defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
 VOP_I32_F32_F32, int_SI_packf16
 >;
 ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
 ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;

-} // End let SubtargetPredicate = SICI
 //===----------------------------------------------------------------------===//
 // VOP3 Instructions
 //===----------------------------------------------------------------------===//
@ -1656,9 +1659,6 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
  VOP_I32_I32_I32_I32
 >;

-// Only on SI
-defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
-  VOP_F32_F32_F32_F32>;
 defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
  VOP_F32_F32_F32_F32, AMDGPUfmin3>;

@ -1699,20 +1699,6 @@ defm V_DIV_FIXUP_F64 : VOP3Inst <

 } // let SchedRW = [WriteDouble]

-defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
-  VOP_I64_I64_I32, shl
->;
-
-// Only on SI
-defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
-  VOP_I64_I64_I32, srl
->;
-
-// Only on SI
-defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
-  VOP_I64_I64_I32, sra
->;
-
 let SchedRW = [WriteDouble] in {
 let isCommutable = 1 in {

@ -1785,6 +1771,26 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <

 } // let SchedRW = [WriteDouble]

+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
+  VOP_I64_I64_I32, shl
+>;
+
+defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
+  VOP_I64_I64_I32, srl
+>;
+
+defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
+  VOP_I64_I64_I32, sra
+>;
+
+defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
+  VOP_F32_F32_F32_F32>;
+
+} // End SubtargetPredicate = isSICI
+
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
 //===----------------------------------------------------------------------===//
@ -1943,14 +1949,14 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
  let UseNamedOperandTable = 1 in {
    def _SAVE : InstSI <
      (outs),
-      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
           SReg_32:$scratch_offset),
      "", []
    >;

    def _RESTORE : InstSI <
      (outs sgpr_class:$dst),
-      (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
      "", []
    >;
  } // End UseNamedOperandTable = 1
@ -1966,14 +1972,14 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
  let UseNamedOperandTable = 1 in {
    def _SAVE : InstSI <
      (outs),
-      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
           SReg_32:$scratch_offset),
      "", []
    >;

    def _RESTORE : InstSI <
      (outs vgpr_class:$dst),
-      (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
      "", []
    >;
  } // End UseNamedOperandTable = 1
@ -2728,16 +2734,12 @@ def : Pat <
                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
 >;

-let Predicates = [isSICI] in {
-
 def : Pat <
  (int_SI_tid),
-  (V_MBCNT_HI_U32_B32_e32 0xffffffff,
+  (V_MBCNT_HI_U32_B32_e64 0xffffffff,
                          (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
 >;

-}
-
 //===----------------------------------------------------------------------===//
 // VOP3 Patterns
 //===----------------------------------------------------------------------===//
--- a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.h
@ -50,6 +50,7 @@ public:
  unsigned NumUserSGPRs;
  std::map<unsigned, unsigned> LaneVGPRs;
  unsigned LDSWaveSpillSize;
+  unsigned ScratchOffsetReg;
  bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
  unsigned getTIDReg() const { return TIDReg; };
  void setTIDReg(unsigned Reg) { TIDReg = Reg; }
--- a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
+++ b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
@ -84,28 +84,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
  if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
    Entry->addLiveIn(ScratchOffsetPreloadReg);

-  // Load the scratch pointer
-  unsigned ScratchPtrReg =
-      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
-  int ScratchPtrFI = -1;
-
-  if (ScratchPtrReg != AMDGPU::NoRegister) {
-    // Found an SGPR to use.
-    MRI.setPhysRegUsed(ScratchPtrReg);
-    BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
-            .addReg(ScratchPtrPreloadReg);
-  } else {
-    // No SGPR is available, we must spill.
-    ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
-    BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
-            .addReg(ScratchPtrPreloadReg)
-            .addFrameIndex(ScratchPtrFI);
-  }
-
  // Load the scratch offset.
  unsigned ScratchOffsetReg =
      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
-  int ScratchOffsetFI = ~0;
+  int ScratchOffsetFI = -1;

  if (ScratchOffsetReg != AMDGPU::NoRegister) {
    // Found an SGPR to use
@ -117,7 +99,9 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
    ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
    BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
            .addReg(ScratchOffsetPreloadReg)
-            .addFrameIndex(ScratchOffsetFI);
+            .addFrameIndex(ScratchOffsetFI)
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+            .addReg(AMDGPU::SGPR0, RegState::Undef);
  }


@ -125,22 +109,27 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
  // add them to all the SI_SPILL_V* instructions.

  RegScavenger RS;
-  bool UseRegScavenger =
-      (ScratchPtrReg == AMDGPU::NoRegister ||
-      ScratchOffsetReg == AMDGPU::NoRegister);
+  unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
+  RS.addScavengingFrameIndex(ScratchRsrcFI);
+
  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
-    if (UseRegScavenger)
-      RS.enterBasicBlock(&MBB);
+    // Add the scratch offset reg as a live-in so that the register scavenger
+    // doesn't re-use it.
+    if (!MBB.isLiveIn(ScratchOffsetReg) &&
+        ScratchOffsetReg != AMDGPU::NoRegister)
+      MBB.addLiveIn(ScratchOffsetReg);
+    RS.enterBasicBlock(&MBB);

    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
         I != E; ++I) {
      MachineInstr &MI = *I;
+      RS.forward(I);
      DebugLoc DL = MI.getDebugLoc();
      switch(MI.getOpcode()) {
-        default: break;;
+        default: break;
        case AMDGPU::SI_SPILL_V512_SAVE:
        case AMDGPU::SI_SPILL_V256_SAVE:
        case AMDGPU::SI_SPILL_V128_SAVE:
@ -153,43 +142,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
        case AMDGPU::SI_SPILL_V256_RESTORE:
        case AMDGPU::SI_SPILL_V512_RESTORE:

-          // Scratch Pointer
-          if (ScratchPtrReg == AMDGPU::NoRegister) {
-            ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
-            BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
-                    ScratchPtrReg)
-                    .addFrameIndex(ScratchPtrFI)
-                    .addReg(AMDGPU::NoRegister)
-                    .addReg(AMDGPU::NoRegister);
-          } else if (!MBB.isLiveIn(ScratchPtrReg)) {
-            MBB.addLiveIn(ScratchPtrReg);
-          }
+          // Scratch resource
+          unsigned ScratchRsrcReg =
+              RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);

+          uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+                          0xffffffff; // Size
+
+          unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+          unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+          unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+          unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+                  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+                  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+                  .addImm(Rsrc & 0xffffffff)
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+                  .addImm(Rsrc >> 32)
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+          // Scratch Offset
          if (ScratchOffsetReg == AMDGPU::NoRegister) {
            ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
            BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
                    ScratchOffsetReg)
                    .addFrameIndex(ScratchOffsetFI)
-                    .addReg(AMDGPU::NoRegister)
-                    .addReg(AMDGPU::NoRegister);
+                    .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+                    .addReg(AMDGPU::SGPR0, RegState::Undef);
          } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
            MBB.addLiveIn(ScratchOffsetReg);
          }

-          if (ScratchPtrReg == AMDGPU::NoRegister ||
+          if (ScratchRsrcReg == AMDGPU::NoRegister ||
              ScratchOffsetReg == AMDGPU::NoRegister) {
            LLVMContext &Ctx = MF.getFunction()->getContext();
            Ctx.emitError("ran out of SGPRs for spilling VGPRs");
-            ScratchPtrReg = AMDGPU::SGPR0;
+            ScratchRsrcReg = AMDGPU::SGPR0;
            ScratchOffsetReg = AMDGPU::SGPR0;
          }
-          MI.getOperand(2).setReg(ScratchPtrReg);
+          MI.getOperand(2).setReg(ScratchRsrcReg);
+          MI.getOperand(2).setIsKill(true);
+          MI.getOperand(2).setIsUndef(false);
          MI.getOperand(3).setReg(ScratchOffsetReg);
+          MI.getOperand(3).setIsUndef(false);
+          MI.getOperand(3).setIsKill(false);
+          MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
+          MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
+          MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
+          MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));

          break;
      }
-      if (UseRegScavenger)
-        RS.forward();
    }
  }
  return true;
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
@ -23,7 +23,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"

-#include "llvm/Support/Debug.h"
 using namespace llvm;

 SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
@ -51,9 +50,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  return Reserved;
 }

-unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
-                                             MachineFunction &MF) const {
-  return RC->getNumRegs();
+unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+
+  // FIXME: We should adjust the max number of waves based on LDS size.
+  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
+  unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+
+  for (regclass_iterator I = regclass_begin(), E = regclass_end();
+       I != E; ++I) {
+
+    unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+    unsigned Limit;
+
+    if (isSGPRClass(*I)) {
+      Limit = SGPRLimit / NumSubRegs;
+    } else {
+      Limit = VGPRLimit / NumSubRegs;
+    }
+
+    const int *Sets = getRegClassPressureSets(*I);
+    assert(Sets);
+    for (unsigned i = 0; Sets[i] != -1; ++i) {
+	    if (Sets[i] == (int)Idx)
+        return Limit;
+    }
+  }
+  return 256;
 }

 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
@ -98,7 +120,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
                                           unsigned LoadStoreOp,
                                           unsigned Value,
-                                           unsigned ScratchPtr,
+                                           unsigned ScratchRsrcReg,
                                           unsigned ScratchOffset,
                                           int64_t Offset,
                                           RegScavenger *RS) const {
@ -113,33 +135,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
  bool RanOutOfSGPRs = false;
  unsigned SOffset = ScratchOffset;

-  unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
-  if (RsrcReg == AMDGPU::NoRegister) {
-    RanOutOfSGPRs = true;
-    RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
-  }
-
  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
  unsigned Size = NumSubRegs * 4;

-  uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
-                  0xffffffff; // Size
-
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
-          getSubReg(RsrcReg, AMDGPU::sub0_sub1))
-          .addReg(ScratchPtr)
-          .addReg(RsrcReg, RegState::ImplicitDefine);
-
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
-          getSubReg(RsrcReg, AMDGPU::sub2))
-          .addImm(Rsrc & 0xffffffff)
-          .addReg(RsrcReg, RegState::ImplicitDefine);
-
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
-          getSubReg(RsrcReg, AMDGPU::sub3))
-          .addImm(Rsrc >> 32)
-          .addReg(RsrcReg, RegState::ImplicitDefine);
-
  if (!isUInt<12>(Offset + Size)) {
    SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
    if (SOffset == AMDGPU::NoRegister) {
@ -163,9 +161,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,

    BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
            .addReg(SubReg, getDefRegState(IsLoad))
-            .addReg(RsrcReg, getKillRegState(IsKill))
+            .addReg(ScratchRsrcReg, getKillRegState(IsKill))
            .addImm(Offset)
-            .addReg(SOffset, getKillRegState(IsKill))
+            .addReg(SOffset)
            .addImm(0) // glc
            .addImm(0) // slc
            .addImm(0) // tfe
@ -235,9 +233,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           Ctx.emitError("Ran out of VGPRs for spilling SGPR");
        }

-        if (isM0) {
+        if (isM0)
          SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
-        }

        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
                .addReg(Spill.VGPR)
@ -262,7 +259,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    case AMDGPU::SI_SPILL_V32_SAVE:
      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
             FrameInfo->getObjectOffset(Index), RS);
      MI->eraseFromParent();
@ -274,7 +271,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    case AMDGPU::SI_SPILL_V512_RESTORE: {
      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
            TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
            FrameInfo->getObjectOffset(Index), RS);
      MI->eraseFromParent();
@ -289,7 +286,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
        BuildMI(*MBB, MI, MI->getDebugLoc(),
                TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
                .addImm(Offset);
-        FIOp.ChangeToRegister(TmpReg, false);
+        FIOp.ChangeToRegister(TmpReg, false, false, true);
      }
    }
  }
@ -446,6 +443,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
  case SIRegisterInfo::TGID_Z:
    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
  case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
+    if (MFI->getShaderType() != ShaderType::COMPUTE)
+      return MFI->ScratchOffsetReg;
    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
  case SIRegisterInfo::SCRATCH_PTR:
    return AMDGPU::SGPR2_SGPR3;
@ -475,3 +474,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
  return AMDGPU::NoRegister;
 }

+unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
+  switch(WaveCount) {
+    case 10: return 24;
+    case 9:  return 28;
+    case 8:  return 32;
+    case 7:  return 36;
+    case 6:  return 40;
+    case 5:  return 48;
+    case 4:  return 64;
+    case 3:  return 84;
+    case 2:  return 128;
+    default: return 256;
+  }
+}
+
+unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
+  switch(WaveCount) {
+    case 10: return 48;
+    case 9:  return 56;
+    case 8:  return 64;
+    case 7:  return 72;
+    case 6:  return 80;
+    case 5:  return 96;
+    default: return 103;
+  }
+}
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
@ -17,6 +17,7 @@
 #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H

 #include "AMDGPURegisterInfo.h"
+#include "llvm/Support/Debug.h"

 namespace llvm {

@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {

  BitVector getReservedRegs(const MachineFunction &MF) const override;

-  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
-                               MachineFunction &MF) const override;
+  unsigned getRegPressureSetLimit(unsigned Idx) const override;

  bool requiresRegisterScavenging(const MachineFunction &Fn) const override;

@ -105,13 +105,21 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
  unsigned getPreloadedValue(const MachineFunction &MF,
                             enum PreloadedValue Value) const;

+  /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
+  ///        concurrent waves.
+  unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
+
+  /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
+  ///        concurrent waves.
+  unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+
  unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
                              const TargetRegisterClass *RC) const;

 private:
  void buildScratchLoadStore(MachineBasicBlock::iterator MI,
                             unsigned LoadStoreOp, unsigned Value,
-                             unsigned ScratchPtr, unsigned ScratchOffset,
+                             unsigned ScratchRsrcReg, unsigned ScratchOffset,
                             int64_t Offset, RegScavenger *RS) const;
 };

--- a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
@ -10,6 +10,7 @@
 //

 #include "AMDGPU.h"
+#include "AMDGPUMCInstLower.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "llvm/ADT/Statistic.h"
@ -206,13 +207,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
          continue;
      }

-      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
-
-      // Op32 could be -1 here if we started with an instruction that had a
+      // getVOPe32 could be -1 here if we started with an instruction that had
      // a 32-bit encoding and then commuted it to an instruction that did not.
-      if (Op32 == -1)
+      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
        continue;

+      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
+
      if (TII->isVOPC(Op32)) {
        unsigned DstReg = MI.getOperand(0).getReg();
        if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
--- a/contrib/llvm/lib/Target/R600/VIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/VIInstructions.td
@ -11,22 +11,6 @@

 let SubtargetPredicate = isVI in {

-def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32,
-  AMDGPUldexp
->;
-def V_BFM_B32 : VOP3InstVI <0x293, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm>;
-def V_BCNT_U32_B32 : VOP3InstVI <0x28b, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
-def V_MBCNT_LO_U32_B32 : VOP3InstVI <0x28c, "v_mbcnt_lo_u32_b32",
-  VOP_I32_I32_I32
->;
-def V_MBCNT_HI_U32_B32 : VOP3InstVI <0x28d, "v_mbcnt_hi_u32_b32",
-  VOP_I32_I32_I32
->;
-
-def V_CVT_PKRTZ_F16_F32 : VOP3InstVI <0x296, "v_cvt_pkrtz_f16_f32",
- VOP_I32_F32_F32, int_SI_packf16
->;
-
 defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
  0x14, "buffer_load_dword", VGPR_32, i32, global_load
 >;
@ -37,22 +21,13 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <

 } // End SubtargetPredicate = isVI

-//===----------------------------------------------------------------------===//
-// VOP2 Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isVI] in {
-
-def : Pat <
-  (int_SI_tid),
-  (V_MBCNT_HI_U32_B32 0xffffffff,
-                      (V_MBCNT_LO_U32_B32 0xffffffff, 0))
->;

 //===----------------------------------------------------------------------===//
 // SMEM Patterns
 //===----------------------------------------------------------------------===//

+let Predicates = [isVI] in {
+
 // 1. Offset as 8bit DWORD immediate
 def : Pat <
  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@ -42,7 +42,8 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
  SunStyleELFSectionSwitchSyntax = true;
  UsesELFSectionDirectiveForBSS = true;

-  UseIntegratedAssembler = true;
+  if (TheTriple.isOSSolaris() || TheTriple.isOSOpenBSD())
+    UseIntegratedAssembler = true;
 }

 const MCExpr*
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@ -777,6 +777,19 @@ public:
                                     MachO::CPU_TYPE_X86_64, Subtype);
  }

+  bool doesSectionRequireSymbols(const MCSection &Section) const override {
+    // Temporary labels in the string literals sections require symbols. The
+    // issue is that the x86_64 relocation format does not allow symbol +
+    // offset, and so the linker does not have enough information to resolve the
+    // access to the appropriate atom unless an external relocation is used. For
+    // non-cstring sections, we expect the compiler to use a non-temporary label
+    // for anything that could have an addend pointing outside the symbol.
+    //
+    // See <rdar://problem/4765733>.
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    return SMO.getType() == MachO::S_CSTRING_LITERALS;
+  }
+
  /// \brief Generate the compact unwind encoding for the CFI instructions.
  uint32_t generateCompactUnwindEncoding(
                             ArrayRef<MCCFIInstruction> Instrs) const override {
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@ -222,6 +222,9 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
        case MCSymbolRefExpr::VK_GOT:
          Type = ELF::R_386_GOT32;
          break;
+        case MCSymbolRefExpr::VK_PLT:
+          Type = ELF::R_386_PLT32;
+          break;
        case MCSymbolRefExpr::VK_GOTOFF:
          Type = ELF::R_386_GOTOFF;
          break;
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@ -10,7 +10,6 @@
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@ -48,21 +47,23 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
                              const MCFixup &Fixup,
                              MCValue Target,
                              uint64_t &FixedValue);
-  void RecordX86_64Relocation(MachObjectWriter *Writer, MCAssembler &Asm,
+  void RecordX86_64Relocation(MachObjectWriter *Writer,
+                              const MCAssembler &Asm,
                              const MCAsmLayout &Layout,
-                              const MCFragment *Fragment, const MCFixup &Fixup,
-                              MCValue Target, uint64_t &FixedValue);
-
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup,
+                              MCValue Target,
+                              uint64_t &FixedValue);
 public:
  X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
                      uint32_t CPUSubtype)
    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                               /*UseAggressiveSymbolFolding=*/Is64Bit) {}

-  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
-                        const MCAsmLayout &Layout, const MCFragment *Fragment,
-                        const MCFixup &Fixup, MCValue Target,
-                        uint64_t &FixedValue) override {
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) override {
    if (Writer->is64Bit())
      RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
                             FixedValue);
@ -96,10 +97,13 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
  }
 }

-void X86MachObjectWriter::RecordX86_64Relocation(
-    MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
-    const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
-    uint64_t &FixedValue) {
+void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
+                                                 const MCAssembler &Asm,
+                                                 const MCAsmLayout &Layout,
+                                                 const MCFragment *Fragment,
+                                                 const MCFixup &Fixup,
+                                                 MCValue Target,
+                                                 uint64_t &FixedValue) {
  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
  unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@ -113,7 +117,6 @@ void X86MachObjectWriter::RecordX86_64Relocation(
  unsigned Index = 0;
  unsigned IsExtern = 0;
  unsigned Type = 0;
-  const MCSymbolData *RelSymbol = nullptr;

  Value = Target.getConstant();

@ -129,6 +132,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(
  if (Target.isAbsolute()) { // constant
    // SymbolNum of 0 indicates the absolute section.
    Type = MachO::X86_64_RELOC_UNSIGNED;
+    Index = 0;

    // FIXME: I believe this is broken, I don't think the linker can understand
    // it. I think it would require a local relocation, but I'm not sure if that
@ -189,30 +193,36 @@ void X86MachObjectWriter::RecordX86_64Relocation(
    Value -= Writer->getSymbolAddress(&B_SD, Layout) -
      (!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout));

-    if (!A_Base)
+    if (A_Base) {
+      Index = A_Base->getIndex();
+      IsExtern = 1;
+    } else {
      Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
    Type = MachO::X86_64_RELOC_UNSIGNED;

    MachO::any_relocation_info MRE;
    MRE.r_word0 = FixupOffset;
-    MRE.r_word1 =
-        (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
-    Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
+    MRE.r_word1 = ((Index     <<  0) |
+                   (IsPCRel   << 24) |
+                   (Log2Size  << 25) |
+                   (IsExtern  << 27) |
+                   (Type      << 28));
+    Writer->addRelocation(Fragment->getParent(), MRE);

-    if (B_Base)
-      RelSymbol = B_Base;
-    else
+    if (B_Base) {
+      Index = B_Base->getIndex();
+      IsExtern = 1;
+    } else {
      Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
    Type = MachO::X86_64_RELOC_SUBTRACTOR;
  } else {
    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-    if (Symbol->isTemporary() && Value) {
-      const MCSection &Sec = Symbol->getSection();
-      if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
-        Asm.addLocalUsedInReloc(*Symbol);
-    }
    const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-    RelSymbol = Asm.getAtom(&SD);
+    const MCSymbolData *Base = Asm.getAtom(&SD);

    // Relocations inside debug sections always use local relocations when
    // possible. This seems to be done because the debugger doesn't fully
@ -222,20 +232,23 @@ void X86MachObjectWriter::RecordX86_64Relocation(
      const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
        Fragment->getParent()->getSection());
      if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
-        RelSymbol = nullptr;
+        Base = nullptr;
    }

    // x86_64 almost always uses external relocations, except when there is no
    // symbol to use as a base address (a local symbol with no preceding
    // non-local symbol).
-    if (RelSymbol) {
+    if (Base) {
+      Index = Base->getIndex();
+      IsExtern = 1;
+
      // Add the local offset, if needed.
-      if (RelSymbol != &SD)
-        Value +=
-            Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(RelSymbol);
+      if (Base != &SD)
+        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
    } else if (Symbol->isInSection() && !Symbol->isVariable()) {
      // The index is the section ordinal (1-based).
      Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
      Value += Writer->getSymbolAddress(&SD, Layout);

      if (IsPCRel)
@ -334,9 +347,12 @@ void X86MachObjectWriter::RecordX86_64Relocation(
  // struct relocation_info (8 bytes)
  MachO::any_relocation_info MRE;
  MRE.r_word0 = FixupOffset;
-  MRE.r_word1 = (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                (IsExtern << 27) | (Type << 28);
-  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+  MRE.r_word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
@ -408,7 +424,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
                   (IsPCRel                   << 30) |
                   MachO::R_SCATTERED);
    MRE.r_word1 = Value2;
-    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+    Writer->addRelocation(Fragment->getParent(), MRE);
  } else {
    // If the offset is more than 24-bits, it won't fit in a scattered
    // relocation offset field, so we fall back to using a non-scattered
@ -430,7 +446,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
                 (IsPCRel     << 30) |
                 MachO::R_SCATTERED);
  MRE.r_word1 = Value;
-  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+  Writer->addRelocation(Fragment->getParent(), MRE);
  return true;
 }

@ -451,6 +467,7 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,

  // Get the symbol data.
  const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  unsigned Index = SD_A->getIndex();

  // We're only going to have a second symbol in pic mode and it'll be a
  // subtraction from the picbase. For 32-bit pic the addend is the difference
@ -473,9 +490,12 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
  // struct relocation_info (8 bytes)
  MachO::any_relocation_info MRE;
  MRE.r_word0 = Value;
-  MRE.r_word1 =
-      (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28);
-  Writer->addRelocation(SD_A, Fragment->getParent(), MRE);
+  MRE.r_word1 = ((Index                    <<  0) |
+                 (IsPCRel                  << 24) |
+                 (Log2Size                 << 25) |
+                 (1                        << 27) | // r_extern
+                 (MachO::GENERIC_RELOC_TLV << 28)); // r_type
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
@ -526,8 +546,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
  // See <reloc.h>.
  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
  unsigned Index = 0;
+  unsigned IsExtern = 0;
  unsigned Type = 0;
-  const MCSymbolData *RelSymbol = nullptr;

  if (Target.isAbsolute()) { // constant
    // SymbolNum of 0 indicates the absolute section.
@ -548,7 +568,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,

    // Check whether we need an external or internal relocation.
    if (Writer->doesSymbolRequireExternRelocation(SD)) {
-      RelSymbol = SD;
+      IsExtern = 1;
+      Index = SD->getIndex();
      // For external relocations, make sure to offset the fixup value to
      // compensate for the addend of the symbol address, if it was
      // undefined. This occurs with weak definitions, for example.
@ -570,9 +591,12 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
  // struct relocation_info (8 bytes)
  MachO::any_relocation_info MRE;
  MRE.r_word0 = FixupOffset;
-  MRE.r_word1 =
-      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
-  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+  MRE.r_word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
 }

 MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -1376,6 +1376,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
  if (!Callee)
    return false;
+  // The prototype of thunks are a lie, don't try to directly call such
+  // functions.
+  if (Callee->hasFnAttribute("thunk"))
+    return false;
  Instruction *Caller = CS.getInstruction();
  const AttributeSet &CallerPAL = CS.getAttributes();

--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@ -2182,9 +2182,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,

      // Handle the floating point versions of equality comparisons too.
      if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
-          (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE))
-        Worklist.push_back(std::make_pair(Op0, Op1));
-
+          (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
+        // Floating point -0.0 and 0.0 compare equal, so we can't
+        // propagate a constant based on that comparison.
+        // FIXME: We should do this optimization if 'no signed zeros' is
+        // applicable via an instruction-level fast-math-flag or some other
+        // indicator that relaxed FP semantics are being used.
+        if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero())
+          Worklist.push_back(std::make_pair(Op0, Op1));
+      }
+ 
      // If "A >= B" is known true, replace "A < B" with false everywhere.
      CmpInst::Predicate NotPred = Cmp->getInversePredicate();
      Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@ -18,6 +18,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@ -47,7 +48,7 @@ namespace {
      AU.addRequiredID(LoopSimplifyID);
      AU.addPreservedID(LoopSimplifyID);
      AU.addPreservedID(LCSSAID);
-      AU.addPreserved("scalar-evolution");
+      AU.addPreserved<ScalarEvolution>();
      AU.addRequired<TargetLibraryInfo>();
    }
  };
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@ -46,7 +46,6 @@ namespace {
    void getAnalysisUsage(AnalysisUsage &AU) const override {
      // This is a cluster of orthogonal Transforms
      AU.addPreserved<UnifyFunctionExitNodes>();
-      AU.addPreserved("mem2reg");
      AU.addPreservedID(LowerInvokePassID);
    }

--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@ -278,9 +278,8 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
                                                    Value *IVOperand) {

  // Currently we only handle instructions of the form "add <indvar> <value>"
-  // and "sub <indvar> <value>".
  unsigned Op = BO->getOpcode();
-  if (!(Op == Instruction::Add || Op == Instruction::Sub))
+  if (Op != Instruction::Add)
    return false;

  // If BO is already both nuw and nsw then there is nothing left to do
@ -304,15 +303,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
  if (OtherOpSCEV == SE->getCouldNotCompute())
    return false;

-  if (Op == Instruction::Sub) {
-    // If the subtraction is of the form "sub <indvar>, <op>", then pretend it
-    // is "add <indvar>, -<op>" and continue, else bail out.
-    if (OtherOperandIdx != 1)
-      return false;
-
-    OtherOpSCEV = SE->getNegativeSCEV(OtherOpSCEV);
-  }
-
  const SCEV *IVOpSCEV = SE->getSCEV(IVOperand);
  const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0);

--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@ -1968,8 +1968,12 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
    // Try to further simplify the result.
    CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
    if (SimplifiedCI && SimplifiedCI->getCalledFunction())
-      if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder))
+      if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
+        // If we were able to further simplify, remove the now redundant call.
+        SimplifiedCI->replaceAllUsesWith(V);
+        SimplifiedCI->eraseFromParent();
        return V;
+      }
    return SimplifiedFortifiedCI;
  }

@ -2218,11 +2222,11 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &
  return nullptr;
 }

-Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
+                                                      IRBuilder<> &B,
+                                                      LibFunc::Func Func) {
  Function *Callee = CI->getCalledFunction();
  StringRef Name = Callee->getName();
-  LibFunc::Func Func =
-      Name.startswith("str") ? LibFunc::strcpy_chk : LibFunc::stpcpy_chk;

  if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
    return nullptr;
@ -2231,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &
        *ObjSize = CI->getArgOperand(2);

  // __stpcpy_chk(x,x,...)  -> x+strlen(x)
-  if (!OnlyLowerUnknownSize && Dst == Src) {
+  if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
    Value *StrLen = EmitStrLen(Src, B, DL, TLI);
    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
  }
@ -2266,11 +2270,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &
  return nullptr;
 }

-Value *FortifiedLibCallSimplifier::optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B) {
+Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
+                                                       IRBuilder<> &B,
+                                                       LibFunc::Func Func) {
  Function *Callee = CI->getCalledFunction();
  StringRef Name = Callee->getName();
-  LibFunc::Func Func =
-      Name.startswith("str") ? LibFunc::strncpy_chk : LibFunc::stpncpy_chk;

  if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
    return nullptr;
@ -2310,10 +2314,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
    return optimizeMemSetChk(CI, Builder);
  case LibFunc::stpcpy_chk:
  case LibFunc::strcpy_chk:
-    return optimizeStrCpyChk(CI, Builder);
+    return optimizeStrpCpyChk(CI, Builder, Func);
  case LibFunc::stpncpy_chk:
  case LibFunc::strncpy_chk:
-    return optimizeStrNCpyChk(CI, Builder);
+    return optimizeStrpNCpyChk(CI, Builder, Func);
  default:
    break;
  }
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@ -79,6 +79,19 @@ static cl::list<std::string> RewriteMapFiles("rewrite-map-file",

 namespace llvm {
 namespace SymbolRewriter {
+void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
+                   const std::string &Target) {
+  if (Comdat *CD = GO->getComdat()) {
+    auto &Comdats = M.getComdatSymbolTable();
+
+    Comdat *C = M.getOrInsertComdat(Target);
+    C->setSelectionKind(CD->getSelectionKind());
+    GO->setComdat(C);
+
+    Comdats.erase(Comdats.find(Source));
+  }
+}
+
 template <RewriteDescriptor::Type DT, typename ValueType,
          ValueType *(llvm::Module::*Get)(StringRef) const>
 class ExplicitRewriteDescriptor : public RewriteDescriptor {
@ -102,10 +115,14 @@ template <RewriteDescriptor::Type DT, typename ValueType,
 bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
  bool Changed = false;
  if (ValueType *S = (M.*Get)(Source)) {
+    if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
+      rewriteComdat(M, GO, Source, Target);
+
    if (Value *T = (M.*Get)(Target))
      S->setValueName(T->getValueName());
    else
      S->setName(Target);
+
    Changed = true;
  }
  return Changed;
@ -145,6 +162,12 @@ performOnModule(Module &M) {
      report_fatal_error("unable to transforn " + C.getName() + " in " +
                         M.getModuleIdentifier() + ": " + Error);

+    if (C.getName() == Name)
+      continue;
+
+    if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
+      rewriteComdat(M, GO, C.getName(), Name);
+
    if (Value *V = (M.*Get)(Name))
      C.setValueName(V->getValueName());
    else
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@ -35,7 +35,6 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
  // We preserve the non-critical-edgeness property
  AU.addPreservedID(BreakCriticalEdgesID);
  // This is a cluster of orthogonal Transforms
-  AU.addPreserved("mem2reg");
  AU.addPreservedID(LowerSwitchID);
 }

--- a/contrib/llvm/patches/README.TXT
+++ b/contrib/llvm/patches/README.TXT
@ -1,11 +1,11 @@
 This is a set of individual patches, which contain all the customizations to
 llvm/clang currently in the FreeBSD base system.  These can be applied in
-alphabetical order to a pristine llvm/clang 3.6.0 release source tree, for
-example by doing:
+alphabetical order to a pristine llvm/clang 3.6.0 RC2 source tree, for example
+by doing:

-svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_351/rc1 llvm-3.6.0-rc1 
-svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_351/rc1 llvm-3.6.0-rc1/tools/clang
-cd llvm-3.6.0-rc1
+svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_351/rc2 llvm-3.6.0-rc2 
+svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_351/rc2 llvm-3.6.0-rc2/tools/clang
+cd llvm-3.6.0-rc2
 for p in /usr/src/contrib/llvm/patches/patch-*.diff; do
 	patch -p0 -f -F0 -E -i $p -s || break
 done
--- a/contrib/llvm/patches/patch-06-llvm-r226664-aarch64-x18.diff
+++ b/contrib/llvm/patches/patch-06-llvm-r226664-aarch64-x18.diff
@ -1,83 +0,0 @@
-Pull in r226664 from upstream llvm trunk (by Tim Northover):
-
-  AArch64: add backend option to reserve x18 (platform register)
-
-  AAPCS64 says that it's up to the platform to specify whether x18 is
-  reserved, and a first step on that way is to add a flag controlling
-  it.
-
-  From: Andrew Turner <andrew@fubar.geek.nz>
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/277774
-
-Index: lib/Target/AArch64/AArch64RegisterInfo.cpp
-===================================================================
--- lib/Target/AArch64/AArch64RegisterInfo.cpp
-+++ lib/Target/AArch64/AArch64RegisterInfo.cpp
-@@ -33,6 +33,10 @@ using namespace llvm;
- #define GET_REGINFO_TARGET_DESC
- #include "AArch64GenRegisterInfo.inc"
- 
-+static cl::opt<bool>
-+ReserveX18("aarch64-reserve-x18", cl::Hidden,
-+          cl::desc("Reserve X18, making it unavailable as GPR"));
-+
- AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
-                                          const AArch64Subtarget *sti)
-     : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
-@@ -90,7 +94,7 @@ AArch64RegisterInfo::getReservedRegs(const Machine
-     Reserved.set(AArch64::W29);
-   }
- 
-  if (STI->isTargetDarwin()) {
-+  if (STI->isTargetDarwin() || ReserveX18) {
-     Reserved.set(AArch64::X18); // Platform register
-     Reserved.set(AArch64::W18);
-   }
-@@ -117,7 +121,7 @@ bool AArch64RegisterInfo::isReservedReg(const Mach
-     return true;
-   case AArch64::X18:
-   case AArch64::W18:
-    return STI->isTargetDarwin();
-+    return STI->isTargetDarwin() || ReserveX18;
-   case AArch64::FP:
-   case AArch64::W29:
-     return TFI->hasFP(MF) || STI->isTargetDarwin();
-@@ -379,7 +383,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(
-   case AArch64::GPR64commonRegClassID:
-     return 32 - 1                                      // XZR/SP
-            - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
-           - STI->isTargetDarwin() // X18 reserved as platform register
-+           - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
-            - hasBasePointer(MF);   // X19
-   case AArch64::FPR8RegClassID:
-   case AArch64::FPR16RegClassID:
-Index: test/CodeGen/AArch64/arm64-platform-reg.ll
-===================================================================
--- test/CodeGen/AArch64/arm64-platform-reg.ll
-+++ test/CodeGen/AArch64/arm64-platform-reg.ll
-@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-DARWIN
-+; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
-+; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
- ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
- 
- ; x18 is reserved as a platform register on Darwin but not on other
-@@ -16,11 +17,11 @@ define void @keep_live() {
- ; CHECK: ldr x18
- ; CHECK: str x18
- 
-; CHECK-DARWIN-NOT: ldr fp
-; CHECK-DARWIN-NOT: ldr x18
-; CHECK-DARWIN: Spill
-; CHECK-DARWIN-NOT: ldr fp
-; CHECK-DARWIN-NOT: ldr x18
-; CHECK-DARWIN: ret
-+; CHECK-RESERVE-X18-NOT: ldr fp
-+; CHECK-RESERVE-X18-NOT: ldr x18
-+; CHECK-RESERVE-X18: Spill
-+; CHECK-RESERVE-X18-NOT: ldr fp
-+; CHECK-RESERVE-X18-NOT: ldr x18
-+; CHECK-RESERVE-X18: ret
-   ret void
- }
--- a/contrib/llvm/patches/patch-07-clang-r227062-fixes-x18.diff
+++ b/contrib/llvm/patches/patch-07-clang-r227062-fixes-x18.diff
@ -1,53 +0,0 @@
-Pull in r227062 from upstream clang trunk (by Renato Golin):
-
-  Allows Clang to use LLVM's fixes-x18 option
-
-  This patch allows clang to have llvm reserve the x18
-  platform register on AArch64. FreeBSD will use this in the kernel for
-  per-cpu data but has no need to reserve this register in userland so
-  will need this flag to reserve it.
-
-  This uses llvm r226664 to allow this register to be reserved.
-
-  Patch by Andrew Turner.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/277775
-
-Index: tools/clang/include/clang/Driver/Options.td
-===================================================================
--- tools/clang/include/clang/Driver/Options.td
-+++ tools/clang/include/clang/Driver/Options.td
-@@ -1209,6 +1209,8 @@ def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cor
- def mno_fix_cortex_a53_835769 : Flag<["-"], "mno-fix-cortex-a53-835769">,
-   Group<m_aarch64_Features_Group>,
-   HelpText<"Don't workaround Cortex-A53 erratum 835769 (AArch64 only)">;
-+def ffixed_x18 : Flag<["-"], "ffixed-x18">, Group<m_aarch64_Features_Group>,
-+  HelpText<"Reserve the x18 register (AArch64 only)">;
- 
- def mvsx : Flag<["-"], "mvsx">, Group<m_ppc_Features_Group>;
- def mno_vsx : Flag<["-"], "mno-vsx">, Group<m_ppc_Features_Group>;
-Index: tools/clang/lib/Driver/Tools.cpp
-===================================================================
--- tools/clang/lib/Driver/Tools.cpp
-+++ tools/clang/lib/Driver/Tools.cpp
-@@ -958,6 +958,11 @@ void Clang::AddAArch64TargetArgs(const ArgList &Ar
-     if (A->getOption().matches(options::OPT_mno_global_merge))
-       CmdArgs.push_back("-mno-global-merge");
-   }
-+
-+  if (Args.hasArg(options::OPT_ffixed_x18)) {
-+    CmdArgs.push_back("-backend-option");
-+    CmdArgs.push_back("-aarch64-reserve-x18");
-+  }
- }
- 
- // Get CPU and ABI names. They are not independent
-Index: tools/clang/test/Driver/aarch64-fixed-x18.c
-===================================================================
--- tools/clang/test/Driver/aarch64-fixed-x18.c
-+++ tools/clang/test/Driver/aarch64-fixed-x18.c
-@@ -0,0 +1,4 @@
-+// RUN: %clang -target aarch64-none-gnu -ffixed-x18 -### %s 2> %t
-+// RUN: FileCheck --check-prefix=CHECK-FIXED-X18 < %t %s
-+
-+// CHECK-FIXED-X18: "-backend-option" "-aarch64-reserve-x18"
--- a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td
@ -224,14 +224,12 @@ class SubjectList<list<AttrSubject> subjects, SubjectDiag diag = WarnDiag,
  string CustomDiag = customDiag;
 }

-class LangOpt<string name, bit negated = 0> {
+class LangOpt<string name> {
  string Name = name;
-  bit Negated = negated;
 }
 def MicrosoftExt : LangOpt<"MicrosoftExt">;
 def Borland : LangOpt<"Borland">;
 def CUDA : LangOpt<"CUDA">;
-def COnly : LangOpt<"CPlusPlus", 1>;

 // Defines targets for target-specific attributes. The list of strings should
 // specify architectures for which the target applies, based off the ArchType
@ -709,25 +707,6 @@ def MinSize : InheritableAttr {
  let Documentation = [Undocumented];
 }

-def FlagEnum : InheritableAttr {
-  let Spellings = [GNU<"flag_enum">];
-  let Subjects = SubjectList<[Enum]>;
-  let Documentation = [FlagEnumDocs];
-  let LangOpts = [COnly];
-  let AdditionalMembers = [{
-private:
-    llvm::APInt FlagBits;
-public:
-    llvm::APInt &getFlagBits() {
-      return FlagBits;
-    }
-
-    const llvm::APInt &getFlagBits() const {
-      return FlagBits;
-    }
-}];
-}
-
 def Flatten : InheritableAttr {
  let Spellings = [GCC<"flatten">];
  let Subjects = SubjectList<[Function], ErrorDiag>;
--- a/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td
@ -1196,16 +1196,6 @@ behavior of the program is undefined.
  }];
 }

-def FlagEnumDocs : Documentation {
-  let Category = DocCatType;
-  let Content = [{
-This attribute can be added to an enumerator to signal to the compiler that it
-is intended to be used as a flag type. This will cause the compiler to assume
-that the range of the type includes all of the values that you can get by
-manipulating bits of the enumerator when issuing warnings.
-  }];
-}
-
 def MSInheritanceDocs : Documentation {
  let Category = DocCatType;
  let Heading = "__single_inhertiance, __multiple_inheritance, __virtual_inheritance";
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
@ -190,7 +190,6 @@ def OverloadedShiftOpParentheses: DiagGroup<"overloaded-shift-op-parentheses">;
 def DanglingElse: DiagGroup<"dangling-else">;
 def DanglingField : DiagGroup<"dangling-field">;
 def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">;
-def FlagEnum : DiagGroup<"flag-enum">;
 def InfiniteRecursion : DiagGroup<"infinite-recursion">;
 def GNUImaginaryConstant : DiagGroup<"gnu-imaginary-constant">;
 def IgnoredQualifiers : DiagGroup<"ignored-qualifiers">;
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@ -2236,7 +2236,7 @@ def warn_attribute_wrong_decl_type : Warning<
  "%0 attribute only applies to %select{functions|unions|"
  "variables and functions|functions and methods|parameters|"
  "functions, methods and blocks|functions, methods, and classes|"
-  "functions, methods, and parameters|classes|enums|variables|methods|"
+  "functions, methods, and parameters|classes|variables|methods|"
  "variables, functions and labels|fields and global variables|structs|"
  "variables and typedefs|thread-local variables|"
  "variables and fields|variables, data members and tag types|"
@ -4059,9 +4059,6 @@ def ext_enum_too_large : ExtWarn<
 def ext_enumerator_increment_too_large : ExtWarn<
  "incremented enumerator value %0 is not representable in the "
  "largest integer type">, InGroup<EnumTooLarge>;
-def warn_flag_enum_constant_out_of_range : Warning<
-  "enumeration value %0 is out of range of flags in enumeration type %1">,
-  InGroup<FlagEnum>;
  
 def warn_illegal_constant_array_size : Extension<
  "size of static array must be an integer constant expression">;
@ -6162,6 +6159,8 @@ let CategoryName = "Inline Assembly Issue" in {
    "invalid lvalue in asm input for constraint '%0'">;
  def err_asm_invalid_input_constraint : Error<
    "invalid input constraint '%0' in asm">;
+  def err_asm_immediate_expected : Error<"constraint '%0' expects "
+    "an integer constant expression">;
  def err_asm_invalid_type_in_input : Error<
    "invalid type %0 in asm input for constraint '%1'">;
  def err_asm_tying_incompatible_types : Error<
--- a/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h
@ -822,7 +822,6 @@ enum AttributeDeclKind {
  ExpectedFunctionMethodOrClass,
  ExpectedFunctionMethodOrParameter,
  ExpectedClass,
-  ExpectedEnum,
  ExpectedVariable,
  ExpectedMethod,
  ExpectedVariableFunctionOrLabel,
--- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
@ -7971,12 +7971,6 @@ public:
                                Expr *SrcExpr, AssignmentAction Action,
                                bool *Complained = nullptr);

-  /// IsValueInFlagEnum - Determine if a value is allowed as part of a flag
-  /// enum. If AllowMask is true, then we also allow the complement of a valid
-  /// value, to be used as a mask.
-  bool IsValueInFlagEnum(const EnumDecl *ED, const llvm::APInt &Val,
-                         bool AllowMask) const;
-
  /// DiagnoseAssignmentEnum - Warn if assignment to enum is a constant
  /// integer not in the range of enum values.
  void DiagnoseAssignmentEnum(QualType DstType, QualType SrcType,
--- a/contrib/llvm/tools/clang/include/clang/Sema/SemaInternal.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/SemaInternal.h
@ -101,7 +101,7 @@ public:
                         DeclContext *MemberContext,
                         bool EnteringContext)
      : Typo(TypoName.getName().getAsIdentifierInfo()), CurrentTCIndex(0),
-        SemaRef(SemaRef), S(S),
+        SavedTCIndex(0), SemaRef(SemaRef), S(S),
        SS(SS ? llvm::make_unique<CXXScopeSpec>(*SS) : nullptr),
        CorrectionValidator(std::move(CCC)), MemberContext(MemberContext),
        Result(SemaRef, TypoName, LookupKind),
@ -187,6 +187,17 @@ public:
           CurrentTCIndex >= ValidatedCorrections.size();
  }

+  /// \brief Save the current position in the correction stream (overwriting any
+  /// previously saved position).
+  void saveCurrentPosition() {
+    SavedTCIndex = CurrentTCIndex;
+  }
+
+  /// \brief Restore the saved position in the correction stream.
+  void restoreSavedPosition() {
+    CurrentTCIndex = SavedTCIndex;
+  }
+
  ASTContext &getContext() const { return SemaRef.Context; }
  const LookupResult &getLookupResult() const { return Result; }

@ -267,6 +278,7 @@ private:

  SmallVector<TypoCorrection, 4> ValidatedCorrections;
  size_t CurrentTCIndex;
+  size_t SavedTCIndex;

  Sema &SemaRef;
  Scope *S;
--- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
@ -5901,6 +5901,8 @@ public:
      : MipsTargetInfoBase(Triple, "o32", "mips32r2") {
    SizeType = UnsignedInt;
    PtrDiffType = SignedInt;
+    Int64Type = SignedLongLong;
+    IntMaxType = Int64Type;
    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
  }
  bool setABI(const std::string &Name) override {
@ -5922,6 +5924,8 @@ public:
      Builder.defineMacro("__mips_isa_rev", "1");
    else if (CPUStr == "mips32r2")
      Builder.defineMacro("__mips_isa_rev", "2");
+    else if (CPUStr == "mips32r6")
+      Builder.defineMacro("__mips_isa_rev", "6");

    if (ABI == "o32") {
      Builder.defineMacro("__mips_o32");
@ -6028,6 +6032,8 @@ public:
    PointerWidth = PointerAlign = 64;
    SizeType = UnsignedLong;
    PtrDiffType = SignedLong;
+    Int64Type = SignedLong;
+    IntMaxType = Int64Type;
  }

  void setN32ABITypes() {
@ -6035,6 +6041,8 @@ public:
    PointerWidth = PointerAlign = 32;
    SizeType = UnsignedInt;
    PtrDiffType = SignedInt;
+    Int64Type = SignedLongLong;
+    IntMaxType = Int64Type;
  }

  bool setABI(const std::string &Name) override {
@ -6065,6 +6073,8 @@ public:
      Builder.defineMacro("__mips_isa_rev", "1");
    else if (CPUStr == "mips64r2")
      Builder.defineMacro("__mips_isa_rev", "2");
+    else if (CPUStr == "mips64r6")
+      Builder.defineMacro("__mips_isa_rev", "6");

    if (ABI == "n32") {
      Builder.defineMacro("__mips_n32");
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
@ -206,9 +206,6 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl(
  GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
  setGlobalVisibility(GV, &D);

-  if (supportsCOMDAT() && GV->isWeakForLinker())
-    GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
-
  if (D.getTLSKind())
    setTLSMode(GV, D);

--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
@ -267,7 +267,15 @@ void CodeGenModule::EmitPointerToInitFunc(const VarDecl *D,
  addUsedGlobal(PtrArray);

  // If the GV is already in a comdat group, then we have to join it.
-  if (llvm::Comdat *C = GV->getComdat())
+  llvm::Comdat *C = GV->getComdat();
+
+  // LinkOnce and Weak linkage are lowered down to a single-member comdat group.
+  // Make an explicit group so we can join it.
+  if (!C && (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())) {
+    C = TheModule.getOrInsertComdat(GV->getName());
+    GV->setComdat(C);
+  }
+  if (C)
    PtrArray->setComdat(C);
 }

--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
@ -1928,31 +1928,6 @@ void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D,
    R.first->second = nullptr;
 }

-static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) {
-  if (!CGM.supportsCOMDAT())
-    return false;
-
-  if (D.hasAttr<SelectAnyAttr>())
-    return true;
-
-  GVALinkage Linkage;
-  if (auto *VD = dyn_cast<VarDecl>(&D))
-    Linkage = CGM.getContext().GetGVALinkageForVariable(VD);
-  else
-    Linkage = CGM.getContext().GetGVALinkageForFunction(cast<FunctionDecl>(&D));
-
-  switch (Linkage) {
-  case GVA_Internal:
-  case GVA_AvailableExternally:
-  case GVA_StrongExternal:
-    return false;
-  case GVA_DiscardableODR:
-  case GVA_StrongODR:
-    return true;
-  }
-  llvm_unreachable("No such linkage");
-}
-
 void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
  llvm::Constant *Init = nullptr;
  QualType ASTTy = D->getType();
@ -2096,9 +2071,6 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
    setTLSMode(GV, *D);
  }

-  if (shouldBeInCOMDAT(*this, *D))
-    GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
-
  // Emit the initializer function if necessary.
  if (NeedsGlobalCtor || NeedsGlobalDtor)
    EmitCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor);
@ -2433,9 +2405,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,

  MaybeHandleStaticInExternC(D, Fn);

-  if (shouldBeInCOMDAT(*this, *D))
-    Fn->setComdat(TheModule.getOrInsertComdat(Fn->getName()));
-
  CodeGenFunction(*this).GenerateCode(D, Fn, FI);

  setFunctionDefinitionAttributes(D, Fn);
--- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
@ -1711,12 +1711,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,

    // The ABI says: It is suggested that it be emitted in the same COMDAT group
    // as the associated data object
-    llvm::Comdat *C = var->getComdat();
-    if (!D.isLocalVarDecl() && C) {
+    if (!D.isLocalVarDecl() && var->isWeakForLinker() && CGM.supportsCOMDAT()) {
+      llvm::Comdat *C = CGM.getModule().getOrInsertComdat(var->getName());
      guard->setComdat(C);
+      var->setComdat(C);
      CGF.CurFn->setComdat(C);
-    } else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
-      guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName()));
    }

    CGM.setStaticLocalDeclGuardAddress(&D, guard);
--- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@ -1534,6 +1534,12 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
  CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn);
  CGM.SetLLVMFunctionAttributesForDefinition(MD, ThunkFn);

+  // Add the "thunk" attribute so that LLVM knows that the return type is
+  // meaningless. These thunks can be used to call functions with differing
+  // return types, and the caller is required to cast the prototype
+  // appropriately to extract the correct value.
+  ThunkFn->addFnAttr("thunk");
+
  // These thunks can be compared, so they are not unnamed.
  ThunkFn->setUnnamedAddr(false);

@ -1829,10 +1835,18 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
    llvm::Function *F = CXXThreadLocalInits[I];

    // If the GV is already in a comdat group, then we have to join it.
-    if (llvm::Comdat *C = GV->getComdat())
+    llvm::Comdat *C = GV->getComdat();
+
+    // LinkOnce and Weak linkage are lowered down to a single-member comdat
+    // group.
+    // Make an explicit group so we can join it.
+    if (!C && (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())) {
+      C = CGM.getModule().getOrInsertComdat(GV->getName());
+      GV->setComdat(C);
      AddToXDU(F)->setComdat(C);
-    else
+    } else {
      NonComdatInits.push_back(F);
+    }
  }

  if (!NonComdatInits.empty()) {
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@ -5423,6 +5423,20 @@ const char *arm::getLLVMArchSuffixForARM(StringRef CPU) {
    .Default("");
 }

+void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple) {
+  if (Args.hasArg(options::OPT_r))
+    return;
+
+  StringRef Suffix = getLLVMArchSuffixForARM(getARMCPUForMArch(Args, Triple));
+  const char *LinkFlag = llvm::StringSwitch<const char *>(Suffix)
+    .Cases("v4", "v4t", "v5", "v5e", nullptr)
+    .Cases("v6", "v6t2", nullptr)
+    .Default("--be8");
+
+  if (LinkFlag)
+    CmdArgs.push_back(LinkFlag);
+}
+
 bool mips::hasMipsAbiArg(const ArgList &Args, const char *Value) {
  Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
  return A && (A->getValue() == StringRef(Value));
@ -6897,6 +6911,7 @@ void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
    break;
  case llvm::Triple::armeb:
  case llvm::Triple::thumbeb:
+    arm::appendEBLinkFlags(Args, CmdArgs, getToolChain().getTriple());
    CmdArgs.push_back("-m");
    switch (getToolChain().getTriple().getEnvironment()) {
    case llvm::Triple::EABI:
@ -7447,6 +7462,10 @@ void gnutools::Link::ConstructJob(Compilation &C, const JobAction &JA,
  if (Args.hasArg(options::OPT_s))
    CmdArgs.push_back("-s");

+  if (ToolChain.getArch() == llvm::Triple::armeb ||
+      ToolChain.getArch() == llvm::Triple::thumbeb)
+    arm::appendEBLinkFlags(Args, CmdArgs, getToolChain().getTriple());
+
  for (const auto &Opt : ToolChain.ExtraOpts)
    CmdArgs.push_back(Opt.c_str());

--- a/contrib/llvm/tools/clang/lib/Driver/Tools.h
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.h
@ -228,6 +228,8 @@ namespace arm {
  const char* getARMCPUForMArch(const llvm::opt::ArgList &Args,
                                const llvm::Triple &Triple);
  const char* getLLVMArchSuffixForARM(StringRef CPU);
+
+  void appendEBLinkFlags(const llvm::opt::ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple);
 }

 namespace mips {
--- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
@ -1340,6 +1340,11 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
        << IsC11 << Ptr->getType() << Ptr->getSourceRange();
      return ExprError();
    }
+    if (IsC11 && ValType->isPointerType() &&
+        RequireCompleteType(Ptr->getLocStart(), ValType->getPointeeType(),
+                            diag::err_incomplete_type)) {
+      return ExprError();
+    }
  } else if (IsN && !ValType->isIntegerType() && !ValType->isPointerType()) {
    // For __atomic_*_n operations, the value type must be a scalar integral or
    // pointer type which is 1, 2, 4, 8 or 16 bytes in length.
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
@ -8828,11 +8828,12 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
          });
      if (Res.isInvalid()) {
        VDecl->setInvalidDecl();
-        return;
-      }
-      if (Res.get() != Args[Idx])
+      } else if (Res.get() != Args[Idx]) {
        Args[Idx] = Res.get();
+      }
    }
+    if (VDecl->isInvalidDecl())
+      return;

    InitializationSequence InitSeq(*this, Entity, Kind, Args);
    ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Args, &DclT);
@ -13519,49 +13520,6 @@ static void CheckForDuplicateEnumValues(Sema &S, ArrayRef<Decl *> Elements,
  }
 }

-bool
-Sema::IsValueInFlagEnum(const EnumDecl *ED, const llvm::APInt &Val,
-                        bool AllowMask) const {
-  FlagEnumAttr *FEAttr = ED->getAttr<FlagEnumAttr>();
-  assert(FEAttr && "looking for value in non-flag enum");
-
-  llvm::APInt FlagMask = ~FEAttr->getFlagBits();
-  unsigned Width = FlagMask.getBitWidth();
-
-  // We will try a zero-extended value for the regular check first.
-  llvm::APInt ExtVal = Val.zextOrSelf(Width);
-
-  // A value is in a flag enum if either its bits are a subset of the enum's
-  // flag bits (the first condition) or we are allowing masks and the same is
-  // true of its complement (the second condition). When masks are allowed, we
-  // allow the common idiom of ~(enum1 | enum2) to be a valid enum value.
-  //
-  // While it's true that any value could be used as a mask, the assumption is
-  // that a mask will have all of the insignificant bits set. Anything else is
-  // likely a logic error.
-  if (!(FlagMask & ExtVal))
-    return true;
-
-  if (AllowMask) {
-    // Try a one-extended value instead. This can happen if the enum is wider
-    // than the constant used, in C with extensions to allow for wider enums.
-    // The mask will still have the correct behaviour, so we give the user the
-    // benefit of the doubt.
-    //
-    // FIXME: This heuristic can cause weird results if the enum was extended
-    // to a larger type and is signed, because then bit-masks of smaller types
-    // that get extended will fall out of range (e.g. ~0x1u). We currently don't
-    // detect that case and will get a false positive for it. In most cases,
-    // though, it can be fixed by making it a signed type (e.g. ~0x1), so it may
-    // be fine just to accept this as a warning.
-    ExtVal |= llvm::APInt::getHighBitsSet(Width, Width - Val.getBitWidth());
-    if (!(FlagMask & ~ExtVal))
-      return true;
-  }
-
-  return false;
-}
-
 void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
                         SourceLocation RBraceLoc, Decl *EnumDeclX,
                         ArrayRef<Decl *> Elements,
@ -13647,8 +13605,10 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
      BestPromotionType = Context.getPromotedIntegerType(BestType);
    else
      BestPromotionType = BestType;
-
-    BestWidth = Context.getIntWidth(BestType);
+    // We don't need to set BestWidth, because BestType is going to be the type
+    // of the enumerators, but we do anyway because otherwise some compilers
+    // warn that it might be used uninitialized.
+    BestWidth = CharWidth;
  }
  else if (NumNegativeBits) {
    // If there is a negative value, figure out the smallest integer type (of
@ -13713,15 +13673,10 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
    }
  }

-  FlagEnumAttr *FEAttr = Enum->getAttr<FlagEnumAttr>();
-  if (FEAttr)
-    FEAttr->getFlagBits() = llvm::APInt(BestWidth, 0);
-
  // Loop over all of the enumerator constants, changing their types to match
-  // the type of the enum if needed. If we have a flag type, we also prepare the
-  // FlagBits cache.
-  for (auto *D : Elements) {
-    auto *ECD = cast_or_null<EnumConstantDecl>(D);
+  // the type of the enum if needed.
+  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+    EnumConstantDecl *ECD = cast_or_null<EnumConstantDecl>(Elements[i]);
    if (!ECD) continue;  // Already issued a diagnostic.

    // Standard C says the enumerators have int type, but we allow, as an
@ -13751,7 +13706,7 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
        // enum-specifier, each enumerator has the type of its
        // enumeration.
        ECD->setType(EnumType);
-      goto flagbits;
+      continue;
    } else {
      NewTy = BestType;
      NewWidth = BestWidth;
@ -13778,32 +13733,8 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
      ECD->setType(EnumType);
    else
      ECD->setType(NewTy);
-
-flagbits:
-    // Check to see if we have a constant with exactly one bit set. Note that x
-    // & (x - 1) will be nonzero if and only if x has more than one bit set.
-    if (FEAttr) {
-      llvm::APInt ExtVal = InitVal.zextOrSelf(BestWidth);
-      if (ExtVal != 0 && !(ExtVal & (ExtVal - 1))) {
-        FEAttr->getFlagBits() |= ExtVal;
-      }
-    }
  }

-  if (FEAttr) {
-    for (Decl *D : Elements) {
-      EnumConstantDecl *ECD = cast_or_null<EnumConstantDecl>(D);
-      if (!ECD) continue;  // Already issued a diagnostic.
-
-      llvm::APSInt InitVal = ECD->getInitVal();
-      if (InitVal != 0 && !IsValueInFlagEnum(Enum, InitVal, true))
-        Diag(ECD->getLocation(), diag::warn_flag_enum_constant_out_of_range)
-          << ECD << Enum;
-    }
-  }
-
-
-
  Enum->completeDefinition(BestType, BestPromotionType,
                           NumPositiveBits, NumNegativeBits);

--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
@ -4396,9 +4396,6 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
  case AttributeList::AT_OptimizeNone:
    handleOptimizeNoneAttr(S, D, Attr);
    break;
-  case AttributeList::AT_FlagEnum:
-    handleSimpleAttribute<FlagEnumAttr>(S, D, Attr);
-    break;
  case AttributeList::AT_Flatten:
    handleSimpleAttribute<FlattenAttr>(S, D, Attr);
    break;
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
@ -4762,12 +4762,8 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl,
                                     VK_RValue, RParenLoc);

  // Bail out early if calling a builtin with custom typechecking.
-  if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) {
-    ExprResult Res = CorrectDelayedTyposInExpr(TheCall);
-    if (!Res.isUsable() || !isa<CallExpr>(Res.get()))
-      return Res;
-    return CheckBuiltinFunctionCall(FDecl, BuiltinID, cast<CallExpr>(Res.get()));
-  }
+  if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID))
+    return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall);

 retry:
  const FunctionType *FuncT;
@ -5785,15 +5781,6 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
                                        ExprObjectKind &OK,
                                        SourceLocation QuestionLoc) {

-  if (!getLangOpts().CPlusPlus) {
-    // C cannot handle TypoExpr nodes on either side of a binop because it
-    // doesn't handle dependent types properly, so make sure any TypoExprs have
-    // been dealt with before checking the operands.
-    ExprResult CondResult = CorrectDelayedTyposInExpr(Cond);
-    if (!CondResult.isUsable()) return QualType();
-    Cond = CondResult;
-  }
-
  ExprResult LHSResult = CheckPlaceholderExpr(LHS.get());
  if (!LHSResult.isUsable()) return QualType();
  LHS = LHSResult;
@ -6173,6 +6160,15 @@ ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc,
                                    SourceLocation ColonLoc,
                                    Expr *CondExpr, Expr *LHSExpr,
                                    Expr *RHSExpr) {
+  if (!getLangOpts().CPlusPlus) {
+    // C cannot handle TypoExpr nodes in the condition because it
+    // doesn't handle dependent types properly, so make sure any TypoExprs have
+    // been dealt with before checking the operands.
+    ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr);
+    if (!CondResult.isUsable()) return ExprError();
+    CondExpr = CondResult.get();
+  }
+
  // If this is the gnu "x ?: y" extension, analyze the types as though the LHS
  // was the condition.
  OpaqueValueExpr *opaqueValue = nullptr;
@ -9457,6 +9453,18 @@ static void checkObjCPointerIntrospection(Sema &S, ExprResult &L, ExprResult &R,
  }
 }

+static NamedDecl *getDeclFromExpr(Expr *E) {
+  if (!E)
+    return nullptr;
+  if (auto *DRE = dyn_cast<DeclRefExpr>(E))
+    return DRE->getDecl();
+  if (auto *ME = dyn_cast<MemberExpr>(E))
+    return ME->getMemberDecl();
+  if (auto *IRE = dyn_cast<ObjCIvarRefExpr>(E))
+    return IRE->getDecl();
+  return nullptr;
+}
+
 /// CreateBuiltinBinOp - Creates a new built-in binary operation with
 /// operator @p Opc at location @c TokLoc. This routine only supports
 /// built-in operations; ActOnBinOp handles overloaded operators.
@ -9494,7 +9502,13 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
    // doesn't handle dependent types properly, so make sure any TypoExprs have
    // been dealt with before checking the operands.
    LHS = CorrectDelayedTyposInExpr(LHSExpr);
-    RHS = CorrectDelayedTyposInExpr(RHSExpr);
+    RHS = CorrectDelayedTyposInExpr(RHSExpr, [Opc, LHS](Expr *E) {
+      if (Opc != BO_Assign)
+        return ExprResult(E);
+      // Avoid correcting the RHS to the same Expr as the LHS.
+      Decl *D = getDeclFromExpr(E);
+      return (D && D == getDeclFromExpr(LHS.get())) ? ExprError() : E;
+    });
    if (!LHS.isUsable() || !RHS.isUsable())
      return ExprError();
  }
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
@ -6143,12 +6143,6 @@ public:

  ExprResult TransformLambdaExpr(LambdaExpr *E) { return Owned(E); }

-  ExprResult TransformOpaqueValueExpr(OpaqueValueExpr *E) {
-    if (Expr *SE = E->getSourceExpr())
-      return TransformExpr(SE);
-    return BaseTransform::TransformOpaqueValueExpr(E);
-  }
-
  ExprResult Transform(Expr *E) {
    ExprResult Res;
    while (true) {
@ -6168,15 +6162,18 @@ public:
    while (!AmbiguousTypoExprs.empty()) {
      auto TE  = AmbiguousTypoExprs.back();
      auto Cached = TransformCache[TE];
-      AmbiguousTypoExprs.pop_back();
+      auto &State = SemaRef.getTypoExprState(TE);
+      State.Consumer->saveCurrentPosition();
      TransformCache.erase(TE);
      if (!TryTransform(E).isInvalid()) {
-        SemaRef.getTypoExprState(TE).Consumer->resetCorrectionStream();
+        State.Consumer->resetCorrectionStream();
        TransformCache.erase(TE);
        Res = ExprError();
        break;
-      } else
-        TransformCache[TE] = Cached;
+      }
+      AmbiguousTypoExprs.remove(TE);
+      State.Consumer->restoreSavedPosition();
+      TransformCache[TE] = Cached;
    }

    // Ensure that all of the TypoExprs within the current Expr have been found.
@ -6235,8 +6232,12 @@ ExprResult Sema::CorrectDelayedTyposInExpr(
  if (E && !ExprEvalContexts.empty() && ExprEvalContexts.back().NumTypos &&
      (E->isTypeDependent() || E->isValueDependent() ||
       E->isInstantiationDependent())) {
+    auto TyposInContext = ExprEvalContexts.back().NumTypos;
+    assert(TyposInContext < ~0U && "Recursive call of CorrectDelayedTyposInExpr");
+    ExprEvalContexts.back().NumTypos = ~0U;
    auto TyposResolved = DelayedTypos.size();
    auto Result = TransformTypos(*this, Filter).Transform(E);
+    ExprEvalContexts.back().NumTypos = TyposInContext;
    TyposResolved -= DelayedTypos.size();
    if (Result.isInvalid() || Result.get() != E) {
      ExprEvalContexts.back().NumTypos -= TyposResolved;
--- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
@ -3587,7 +3587,7 @@ retry_lookup:
        QualifiedResults.push_back(Candidate);
      break;
    }
-    Candidate.setCorrectionRange(TempSS, Result.getLookupNameInfo());
+    Candidate.setCorrectionRange(SS.get(), Result.getLookupNameInfo());
    return true;
  }
  return false;
--- a/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
@ -687,39 +687,26 @@ static void checkCaseValue(Sema &S, SourceLocation Loc, const llvm::APSInt &Val,
  }
 }

-typedef SmallVector<std::pair<llvm::APSInt, EnumConstantDecl*>, 64> EnumValsTy;
-
 /// Returns true if we should emit a diagnostic about this case expression not
 /// being a part of the enum used in the switch controlling expression.
-static bool ShouldDiagnoseSwitchCaseNotInEnum(const Sema &S,
+static bool ShouldDiagnoseSwitchCaseNotInEnum(const ASTContext &Ctx,
                                              const EnumDecl *ED,
-                                              const Expr *CaseExpr,
-                                              EnumValsTy::iterator &EI,
-                                              EnumValsTy::iterator &EIEnd,
-                                              const llvm::APSInt &Val) {
-  bool FlagType = ED->hasAttr<FlagEnumAttr>();
-
-  if (const DeclRefExpr *DRE =
-          dyn_cast<DeclRefExpr>(CaseExpr->IgnoreParenImpCasts())) {
+                                              const Expr *CaseExpr) {
+  // Don't warn if the 'case' expression refers to a static const variable of
+  // the enum type.
+  CaseExpr = CaseExpr->IgnoreParenImpCasts();
+  if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(CaseExpr)) {
    if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+      if (!VD->hasGlobalStorage())
+        return true;
      QualType VarType = VD->getType();
-      QualType EnumType = S.Context.getTypeDeclType(ED);
-      if (VD->hasGlobalStorage() && VarType.isConstQualified() &&
-          S.Context.hasSameUnqualifiedType(EnumType, VarType))
+      if (!VarType.isConstQualified())
+        return true;
+      QualType EnumType = Ctx.getTypeDeclType(ED);
+      if (Ctx.hasSameUnqualifiedType(EnumType, VarType))
        return false;
    }
  }
-
-  if (FlagType) {
-    return !S.IsValueInFlagEnum(ED, Val, false);
-  } else {
-    while (EI != EIEnd && EI->first < Val)
-      EI++;
-
-    if (EI != EIEnd && EI->first == Val)
-      return false;
-  }
-
  return true;
 }

@ -1059,6 +1046,8 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch,
    // If switch has default case, then ignore it.
    if (!CaseListIsErroneous  && !HasConstantCond && ET) {
      const EnumDecl *ED = ET->getDecl();
+      typedef SmallVector<std::pair<llvm::APSInt, EnumConstantDecl*>, 64>
+        EnumValsTy;
      EnumValsTy EnumVals;

      // Gather all enum values, set their type and sort them,
@ -1069,48 +1058,57 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch,
        EnumVals.push_back(std::make_pair(Val, EDI));
      }
      std::stable_sort(EnumVals.begin(), EnumVals.end(), CmpEnumVals);
-      auto EI = EnumVals.begin(), EIEnd =
+      EnumValsTy::iterator EIend =
        std::unique(EnumVals.begin(), EnumVals.end(), EqEnumVals);

      // See which case values aren't in enum.
+      EnumValsTy::const_iterator EI = EnumVals.begin();
      for (CaseValsTy::const_iterator CI = CaseVals.begin();
-          CI != CaseVals.end(); CI++) {
-        Expr *CaseExpr = CI->second->getLHS();
-        if (ShouldDiagnoseSwitchCaseNotInEnum(*this, ED, CaseExpr, EI, EIEnd,
-                                              CI->first))
-          Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum)
-            << CondTypeBeforePromotion;
+           CI != CaseVals.end(); CI++) {
+        while (EI != EIend && EI->first < CI->first)
+          EI++;
+        if (EI == EIend || EI->first > CI->first) {
+          Expr *CaseExpr = CI->second->getLHS();
+          if (ShouldDiagnoseSwitchCaseNotInEnum(Context, ED, CaseExpr))
+            Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum)
+              << CondTypeBeforePromotion;
+        }
      }
-
      // See which of case ranges aren't in enum
      EI = EnumVals.begin();
      for (CaseRangesTy::const_iterator RI = CaseRanges.begin();
-          RI != CaseRanges.end(); RI++) {
-        Expr *CaseExpr = RI->second->getLHS();
-        if (ShouldDiagnoseSwitchCaseNotInEnum(*this, ED, CaseExpr, EI, EIEnd,
-                                              RI->first))
-          Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum)
-            << CondTypeBeforePromotion;
+           RI != CaseRanges.end() && EI != EIend; RI++) {
+        while (EI != EIend && EI->first < RI->first)
+          EI++;
+
+        if (EI == EIend || EI->first != RI->first) {
+          Expr *CaseExpr = RI->second->getLHS();
+          if (ShouldDiagnoseSwitchCaseNotInEnum(Context, ED, CaseExpr))
+            Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum)
+              << CondTypeBeforePromotion;
+        }

        llvm::APSInt Hi =
          RI->second->getRHS()->EvaluateKnownConstInt(Context);
        AdjustAPSInt(Hi, CondWidth, CondIsSigned);
-
-        CaseExpr = RI->second->getRHS();
-        if (ShouldDiagnoseSwitchCaseNotInEnum(*this, ED, CaseExpr, EI, EIEnd,
-                                              Hi))
-          Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum)
-            << CondTypeBeforePromotion;
+        while (EI != EIend && EI->first < Hi)
+          EI++;
+        if (EI == EIend || EI->first != Hi) {
+          Expr *CaseExpr = RI->second->getRHS();
+          if (ShouldDiagnoseSwitchCaseNotInEnum(Context, ED, CaseExpr))
+            Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum)
+              << CondTypeBeforePromotion;
+        }
      }

      // Check which enum vals aren't in switch
-      auto CI = CaseVals.begin();
-      auto RI = CaseRanges.begin();
+      CaseValsTy::const_iterator CI = CaseVals.begin();
+      CaseRangesTy::const_iterator RI = CaseRanges.begin();
      bool hasCasesNotInSwitch = false;

      SmallVector<DeclarationName,8> UnhandledNames;

-      for (EI = EnumVals.begin(); EI != EIEnd; EI++){
+      for (EI = EnumVals.begin(); EI != EIend; EI++){
        // Drop unneeded case values
        while (CI != CaseVals.end() && CI->first < EI->first)
          CI++;
@ -1197,37 +1195,30 @@ Sema::DiagnoseAssignmentEnum(QualType DstType, QualType SrcType,
        llvm::APSInt RhsVal = SrcExpr->EvaluateKnownConstInt(Context);
        AdjustAPSInt(RhsVal, DstWidth, DstIsSigned);
        const EnumDecl *ED = ET->getDecl();
+        typedef SmallVector<std::pair<llvm::APSInt, EnumConstantDecl *>, 64>
+            EnumValsTy;
+        EnumValsTy EnumVals;

-        if (ED->hasAttr<FlagEnumAttr>()) {
-          if (!IsValueInFlagEnum(ED, RhsVal, true))
-            Diag(SrcExpr->getExprLoc(), diag::warn_not_in_enum_assignment)
+        // Gather all enum values, set their type and sort them,
+        // allowing easier comparison with rhs constant.
+        for (auto *EDI : ED->enumerators()) {
+          llvm::APSInt Val = EDI->getInitVal();
+          AdjustAPSInt(Val, DstWidth, DstIsSigned);
+          EnumVals.push_back(std::make_pair(Val, EDI));
+        }
+        if (EnumVals.empty())
+          return;
+        std::stable_sort(EnumVals.begin(), EnumVals.end(), CmpEnumVals);
+        EnumValsTy::iterator EIend =
+            std::unique(EnumVals.begin(), EnumVals.end(), EqEnumVals);
+
+        // See which values aren't in the enum.
+        EnumValsTy::const_iterator EI = EnumVals.begin();
+        while (EI != EIend && EI->first < RhsVal)
+          EI++;
+        if (EI == EIend || EI->first != RhsVal) {
+          Diag(SrcExpr->getExprLoc(), diag::warn_not_in_enum_assignment)
              << DstType.getUnqualifiedType();
-        } else {
-          typedef SmallVector<std::pair<llvm::APSInt, EnumConstantDecl *>, 64>
-              EnumValsTy;
-          EnumValsTy EnumVals;
-
-          // Gather all enum values, set their type and sort them,
-          // allowing easier comparison with rhs constant.
-          for (auto *EDI : ED->enumerators()) {
-            llvm::APSInt Val = EDI->getInitVal();
-            AdjustAPSInt(Val, DstWidth, DstIsSigned);
-            EnumVals.push_back(std::make_pair(Val, EDI));
-          }
-          if (EnumVals.empty())
-            return;
-          std::stable_sort(EnumVals.begin(), EnumVals.end(), CmpEnumVals);
-          EnumValsTy::iterator EIend =
-              std::unique(EnumVals.begin(), EnumVals.end(), EqEnumVals);
-
-          // See which values aren't in the enum.
-          EnumValsTy::const_iterator EI = EnumVals.begin();
-          while (EI != EIend && EI->first < RhsVal)
-            EI++;
-          if (EI == EIend || EI->first != RhsVal) {
-            Diag(SrcExpr->getExprLoc(), diag::warn_not_in_enum_assignment)
-                << DstType.getUnqualifiedType();
-          }
        }
      }
    }
--- a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp
@ -230,9 +230,8 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
      llvm::APSInt Result;
      if (!InputExpr->EvaluateAsInt(Result, Context))
        return StmtError(
-            Diag(InputExpr->getLocStart(), diag::err_asm_invalid_type_in_input)
-            << InputExpr->getType() << Info.getConstraintStr()
-            << InputExpr->getSourceRange());
+            Diag(InputExpr->getLocStart(), diag::err_asm_immediate_expected)
+            << Info.getConstraintStr() << InputExpr->getSourceRange());
      if (Result.slt(Info.getImmConstantMin()) ||
          Result.sgt(Info.getImmConstantMax()))
        return StmtError(Diag(InputExpr->getLocStart(),
--- a/contrib/llvm/tools/clang/lib/Tooling/ArgumentsAdjusters.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/ArgumentsAdjusters.cpp
@ -78,7 +78,9 @@ ArgumentsAdjuster getInsertArgumentAdjuster(const char *Extra,

 ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First,
                                   ArgumentsAdjuster Second) {
-  return std::bind(Second, std::bind(First, std::placeholders::_1));
+  return [First, Second](const CommandLineArguments &Args) {
+    return Second(First(Args));
+  };
 }

 } // end namespace tooling
--- a/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp
@ -2185,8 +2185,7 @@ static std::string CalculateDiagnostic(const Record &S) {
    Namespace = 1U << 11,
    Field = 1U << 12,
    CXXMethod = 1U << 13,
-    ObjCProtocol = 1U << 14,
-    Enum = 1U << 15
+    ObjCProtocol = 1U << 14
  };
  uint32_t SubMask = 0;

@ -2220,7 +2219,6 @@ static std::string CalculateDiagnostic(const Record &S) {
                   .Case("Namespace", Namespace)
                   .Case("Field", Field)
                   .Case("CXXMethod", CXXMethod)
-                   .Case("Enum", Enum)
                   .Default(0);
    if (!V) {
      // Something wasn't in our mapping, so be helpful and let the developer
@ -2239,7 +2237,6 @@ static std::string CalculateDiagnostic(const Record &S) {
    case Var:   return "ExpectedVariable";
    case Param: return "ExpectedParameter";
    case Class: return "ExpectedClass";
-    case Enum:  return "ExpectedEnum";
    case CXXMethod:
      // FIXME: Currently, this maps to ExpectedMethod based on existing code,
      // but should map to something a bit more accurate at some point.
@ -2393,8 +2390,6 @@ static std::string GenerateLangOptRequirements(const Record &R,
  std::string FnName = "check", Test;
  for (auto I = LangOpts.begin(), E = LangOpts.end(); I != E; ++I) {
    std::string Part = (*I)->getValueAsString("Name");
-    if ((*I)->getValueAsBit("Negated"))
-      Test += "!";
    Test += "S.LangOpts." + Part;
    if (I + 1 != E)
      Test += " || ";
--- a/Show More
+++ b/Show More