Vendor import of llvm trunk r291274:

https://llvm.org/svn/llvm-project/llvm/trunk@291274
2017-01-06 20:13:21 +00:00 · 2017-01-06 20:13:21 +00:00 · 7e7b670074
commit 7e7b670074
parent 4b570baa7e
235 changed files with 14812 additions and 6276 deletions
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@ -167,7 +167,10 @@ check_symbol_exists(futimens sys/stat.h HAVE_FUTIMENS)
 check_symbol_exists(futimes sys/time.h HAVE_FUTIMES)
 check_symbol_exists(posix_fallocate fcntl.h HAVE_POSIX_FALLOCATE)
 # AddressSanitizer conflicts with lib/Support/Unix/Signals.inc
-if( HAVE_SIGNAL_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*")
+# Avoid sigaltstack on Apple platforms, where backtrace() cannot handle it
+# (rdar://7089625) and _Unwind_Backtrace is unusable because it cannot unwind
+# past the signal handler after an assertion failure (rdar://29866587).
+if( HAVE_SIGNAL_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*" AND NOT APPLE )
  check_symbol_exists(sigaltstack signal.h HAVE_SIGALTSTACK)
 endif()
 if( HAVE_SYS_UIO_H )
@ -314,6 +317,8 @@ else()
 endif()

 check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+check_cxx_compiler_flag("-Wno-gnu-zero-variadic-macro-arguments"
+                        SUPPORTS_NO_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG)

 set(USE_NO_MAYBE_UNINITIALIZED 0)
 set(USE_NO_UNINITIALIZED 0)
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@ -1014,6 +1014,10 @@ function(add_unittest test_suite test_name)
  if (SUPPORTS_NO_VARIADIC_MACROS_FLAG)
    list(APPEND LLVM_COMPILE_FLAGS "-Wno-variadic-macros")
  endif ()
+  # Some parts of gtest rely on this GNU extension, don't warn on it.
+  if(SUPPORTS_NO_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG)
+    list(APPEND LLVM_COMPILE_FLAGS "-Wno-gnu-zero-variadic-macro-arguments")
+  endif()

  set(LLVM_REQUIRES_RTTI OFF)

--- a/docs/CompileCudaWithLLVM.rst
+++ b/docs/CompileCudaWithLLVM.rst
@ -35,8 +35,8 @@ by many Linux package managers; you probably need to install nvidia's package.

 You will need CUDA 7.0, 7.5, or 8.0 to compile with clang.

-CUDA compilation is supported on Linux, and on MacOS as of XXXX-XX-XX.  Windows
-support is planned but not yet in place.
+CUDA compilation is supported on Linux, on MacOS as of 2016-11-18, and on
+Windows as of 2017-01-05.

 Invoking clang
 --------------
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@ -132,7 +132,7 @@ committed to trunk. If you do not have commit access, someone has to
 commit the change for you (with attribution). It is sufficient to add
 a comment to the approved review indicating you cannot commit the patch
 yourself. If you have commit access, there are multiple workflows to commit the
-change. Whichever method you follow it is recommend that your commit message
+change. Whichever method you follow it is recommended that your commit message
 ends with the line:

 ::
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@ -128,7 +128,7 @@ extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager,
 /// \brief The CGSCC pass manager.
 ///
 /// See the documentation for the PassManager template for details. It runs
-/// a sequency of SCC passes over each SCC that the manager is run over. This
+/// a sequence of SCC passes over each SCC that the manager is run over. This
 /// typedef serves as a convenient way to refer to this construct.
 typedef PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
                    CGSCCUpdateResult &>
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@ -36,6 +36,8 @@ namespace llvm {
 class Function;
 class GlobalValue;
 class Loop;
+class ScalarEvolution;
+class SCEV;
 class Type;
 class User;
 class Value;
@ -613,10 +615,11 @@ public:
  /// merged into the instruction indexing mode. Some targets might want to
  /// distinguish between address computation for memory operations on vector
  /// types and scalar types. Such targets should override this function.
-  /// The 'IsComplex' parameter is a hint that the address computation is likely
-  /// to involve multiple instructions and as such unlikely to be merged into
-  /// the address indexing mode.
-  int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
+  /// The 'SE' parameter holds pointer for the scalar evolution object which
+  /// is used in order to get the Ptr step value in case of constant stride.
+  /// The 'Ptr' parameter holds SCEV of the access pointer.
+  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
+                                const SCEV *Ptr = nullptr) const;

  /// \returns The cost, if any, of keeping values of the given types alive
  /// over a callsite.
@ -795,7 +798,8 @@ public:
  virtual int getCallInstrCost(Function *F, Type *RetTy,
                               ArrayRef<Type *> Tys) = 0;
  virtual unsigned getNumberOfParts(Type *Tp) = 0;
-  virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
+  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+                                        const SCEV *Ptr) = 0;
  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                                  MemIntrinsicInfo &Info) = 0;
@ -1044,8 +1048,9 @@ public:
  unsigned getNumberOfParts(Type *Tp) override {
    return Impl.getNumberOfParts(Tp);
  }
-  int getAddressComputationCost(Type *Ty, bool IsComplex) override {
-    return Impl.getAddressComputationCost(Ty, IsComplex);
+  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+                                const SCEV *Ptr) override {
+    return Impl.getAddressComputationCost(Ty, SE, Ptr);
  }
  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
    return Impl.getCostOfKeepingLiveOverCall(Tys);
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -15,6 +15,7 @@
 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H

+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
@ -370,7 +371,10 @@ public:

  unsigned getNumberOfParts(Type *Tp) { return 0; }

-  unsigned getAddressComputationCost(Type *Tp, bool) { return 0; }
+  unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
+                                     const SCEV *) {
+    return 0; 
+  }

  unsigned getReductionCost(unsigned, Type *, bool) { return 1; }

@ -422,6 +426,30 @@ public:
                                VectorType *VecTy) const {
    return VF;
  }
+protected:
+  bool isStridedAccess(const SCEV *Ptr) {
+    return Ptr && isa<SCEVAddRecExpr>(Ptr);
+  }
+
+  const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
+                                            const SCEV *Ptr) {
+    if (!isStridedAccess(Ptr))
+      return nullptr;
+    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
+    return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
+  }
+
+  bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
+                                       int64_t MergeDistance) {
+    const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
+    if (!Step)
+      return false;
+    APInt StrideVal = Step->getAPInt();
+    if (StrideVal.getBitWidth() > 64)
+      return false;
+    // FIXME: need to take absolute value for negtive stride case  
+    return StrideVal.getSExtValue() < MergeDistance;
+  }
 };

 /// \brief CRTP base class for use as a mix-in that aids implementing
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@ -18,7 +18,6 @@
 #ifndef LLVM_BITCODE_BITCODES_H
 #define LLVM_BITCODE_BITCODES_H

-#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
@ -166,11 +165,8 @@ template <> struct isPodLike<BitCodeAbbrevOp> { static const bool value=true; };
 /// BitCodeAbbrev - This class represents an abbreviation record.  An
 /// abbreviation allows a complex record that has redundancy to be stored in a
 /// specialized format instead of the fully-general, fully-vbr, format.
-class BitCodeAbbrev : public RefCountedBase<BitCodeAbbrev> {
+class BitCodeAbbrev {
  SmallVector<BitCodeAbbrevOp, 32> OperandList;
-  // Only RefCountedBase is allowed to delete.
-  ~BitCodeAbbrev() = default;
-  friend class RefCountedBase<BitCodeAbbrev>;

 public:
  unsigned getNumOperandInfos() const {
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@ -16,7 +16,6 @@
 #define LLVM_BITCODE_BITSTREAMREADER_H

 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include "llvm/Support/Endian.h"
@ -42,7 +41,7 @@ public:
  /// describe abbreviations that all blocks of the specified ID inherit.
  struct BlockInfo {
    unsigned BlockID;
-    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
    std::string Name;
    std::vector<std::pair<unsigned, std::string> > RecordNames;
  };
@ -316,11 +315,11 @@ class BitstreamCursor : SimpleBitstreamCursor {
  unsigned CurCodeSize = 2;

  /// Abbrevs installed at in this block.
-  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
+  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;

  struct Block {
    unsigned PrevCodeSize;
-    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;

    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
  };
@ -478,8 +477,8 @@ public:
    return CurAbbrevs[AbbrevNo].get();
  }

-  /// Read the current record and discard it.
-  void skipRecord(unsigned AbbrevID);
+  /// Read the current record and discard it, returning the code for the record.
+  unsigned skipRecord(unsigned AbbrevID);

  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
                      StringRef *Blob = nullptr);
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@ -43,12 +43,12 @@ class BitstreamWriter {
  unsigned BlockInfoCurBID;

  /// CurAbbrevs - Abbrevs installed at in this block.
-  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
+  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;

  struct Block {
    unsigned PrevCodeSize;
    size_t StartSizeWord;
-    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
    Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
  };

@ -59,7 +59,7 @@ class BitstreamWriter {
  /// These describe abbreviations that all blocks of the specified ID inherit.
  struct BlockInfo {
    unsigned BlockID;
-    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
  };
  std::vector<BlockInfo> BlockInfoRecords;

@ -469,12 +469,12 @@ public:

 private:
  // Emit the abbreviation as a DEFINE_ABBREV record.
-  void EncodeAbbrev(BitCodeAbbrev *Abbv) {
+  void EncodeAbbrev(const BitCodeAbbrev &Abbv) {
    EmitCode(bitc::DEFINE_ABBREV);
-    EmitVBR(Abbv->getNumOperandInfos(), 5);
-    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+    EmitVBR(Abbv.getNumOperandInfos(), 5);
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos());
         i != e; ++i) {
-      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i);
      Emit(Op.isLiteral(), 1);
      if (Op.isLiteral()) {
        EmitVBR64(Op.getLiteralValue(), 8);
@ -489,10 +489,10 @@ public:

  /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
  /// method takes ownership of the specified abbrev.
-  unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
+  unsigned EmitAbbrev(std::shared_ptr<BitCodeAbbrev> Abbv) {
    // Emit the abbreviation as a record.
-    EncodeAbbrev(Abbv);
-    CurAbbrevs.push_back(Abbv);
+    EncodeAbbrev(*Abbv);
+    CurAbbrevs.push_back(std::move(Abbv));
    return static_cast<unsigned>(CurAbbrevs.size())-1 +
      bitc::FIRST_APPLICATION_ABBREV;
  }
@ -532,13 +532,13 @@ public:

  /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
  /// BlockID.
-  unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
+  unsigned EmitBlockInfoAbbrev(unsigned BlockID, std::shared_ptr<BitCodeAbbrev> Abbv) {
    SwitchToBlockID(BlockID);
-    EncodeAbbrev(Abbv);
+    EncodeAbbrev(*Abbv);

    // Add the abbrev to the specified block record.
    BlockInfo &Info = getOrCreateBlockInfo(BlockID);
-    Info.Abbrevs.push_back(Abbv);
+    Info.Abbrevs.push_back(std::move(Abbv));

    return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
  }
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@ -140,6 +140,9 @@ private:
  /// If the target supports dwarf debug info, this pointer is non-null.
  DwarfDebug *DD;

+  /// If the current module uses dwarf CFI annotations strictly for debugging.
+  bool isCFIMoveForDebugging;
+
 protected:
  explicit AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);

@ -262,6 +265,10 @@ public:
  enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
  CFIMoveType needsCFIMoves();

+  /// Returns false if needsCFIMoves() == CFI_M_EH for any function
+  /// in the module.
+  bool needsOnlyDebugCFIMoves() const { return isCFIMoveForDebugging; }
+
  bool needsSEHMoves();

  /// Print to the current output stream assembly representations of the
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@ -925,7 +925,10 @@ public:
    return LT.first;
  }

-  unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; }
+  unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
+                                     const SCEV *) {
+    return 0; 
+  }

  unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
    assert(Ty->isVectorTy() && "Expect a vector type");
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@ -651,6 +651,9 @@ class DIE : IntrusiveBackListNode, public DIEValueList {
  unsigned AbbrevNumber = ~0u;
  /// Dwarf tag code.
  dwarf::Tag Tag = (dwarf::Tag)0;
+  /// Set to true to force a DIE to emit an abbreviation that says it has
+  /// children even when it doesn't. This is used for unit testing purposes.
+  bool ForceChildren;
  /// Children DIEs.
  IntrusiveBackList<DIE> Children;

@ -659,7 +662,8 @@ class DIE : IntrusiveBackListNode, public DIEValueList {
  PointerUnion<DIE *, DIEUnit *> Owner;

  DIE() = delete;
-  explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag) {}
+  explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag),
+      ForceChildren(false) {}

 public:
  static DIE *get(BumpPtrAllocator &Alloc, dwarf::Tag Tag) {
@ -677,7 +681,8 @@ public:
  /// Get the compile/type unit relative offset of this DIE.
  unsigned getOffset() const { return Offset; }
  unsigned getSize() const { return Size; }
-  bool hasChildren() const { return !Children.empty(); }
+  bool hasChildren() const { return ForceChildren || !Children.empty(); }
+  void setForceChildren(bool B) { ForceChildren = B; }

  typedef IntrusiveBackList<DIE>::iterator child_iterator;
  typedef IntrusiveBackList<DIE>::const_iterator const_child_iterator;
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@ -180,6 +180,8 @@ private:
  /// \pre \p U is a branch instruction.
  bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);

+  bool translateSwitch(const User &U, MachineIRBuilder &MIRBuilder);
+
  bool translateExtractValue(const User &U, MachineIRBuilder &MIRBuilder);

  bool translateInsertValue(const User &U, MachineIRBuilder &MIRBuilder);
@ -292,12 +294,8 @@ private:
    return translateBinaryOp(TargetOpcode::G_FREM, U, MIRBuilder);
  }

-
  // Stubs to keep the compiler happy while we implement the rest of the
  // translation.
-  bool translateSwitch(const User &U, MachineIRBuilder &MIRBuilder) {
-    return false;
-  }
  bool translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder) {
    return false;
  }
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@ -308,7 +308,7 @@ public:
  // Iteration support for live in sets.  These sets are kept in sorted
  // order by their register number.
  typedef LiveInVector::const_iterator livein_iterator;
-  livein_iterator livein_begin() const { return LiveIns.begin(); }
+  livein_iterator livein_begin() const;
  livein_iterator livein_end()   const { return LiveIns.end(); }
  bool            livein_empty() const { return LiveIns.empty(); }
  iterator_range<livein_iterator> liveins() const {
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@ -10,6 +10,8 @@
 #ifndef LLVM_LIB_DEBUGINFO_DWARFDIE_H
 #define LLVM_LIB_DEBUGINFO_DWARFDIE_H

+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"

@ -40,9 +42,6 @@ public:
  
  bool isValid() const { return U && Die; }
  explicit operator bool() const { return isValid(); }
-  bool operator ==(const DWARFDie &RHS) const {
-    return Die == RHS.Die && U == RHS.U;
-  }
  const DWARFDebugInfoEntry *getDebugInfoEntry() const { return Die; }
  DWARFUnit *getDwarfUnit() const { return U; }

@ -361,8 +360,61 @@ public:
  getInlinedChainForAddress(const uint64_t Address,
                            SmallVectorImpl<DWARFDie> &InlinedChain) const;

+  class iterator;
+  
+  iterator begin() const;
+  iterator end() const;
+  iterator_range<iterator> children() const;
 };

+  
+inline bool operator==(const DWARFDie &LHS, const DWARFDie &RHS) {
+  return LHS.getDebugInfoEntry() == RHS.getDebugInfoEntry() &&
+      LHS.getDwarfUnit() == RHS.getDwarfUnit();
+}
+
+inline bool operator!=(const DWARFDie &LHS, const DWARFDie &RHS) {
+  return !(LHS == RHS);
+}
+
+class DWARFDie::iterator : public iterator_facade_base<iterator,
+                                                      std::forward_iterator_tag,
+                                                      const DWARFDie> {
+  DWARFDie Die;
+  void skipNull() {
+    if (Die && Die.isNULL())
+      Die = DWARFDie();
+  }
+public:
+  iterator() = default;
+  explicit iterator(DWARFDie D) : Die(D) {
+    // If we start out with only a Null DIE then invalidate.
+    skipNull();
+  }
+  iterator &operator++() {
+    Die = Die.getSibling();
+    // Don't include the NULL die when iterating.
+    skipNull();
+    return *this;
+  }
+  explicit operator bool() const { return Die.isValid(); }
+  const DWARFDie &operator*() const { return Die; }
+  bool operator==(const iterator &X) const { return Die == X.Die; }
+};
+
+// These inline functions must follow the DWARFDie::iterator definition above
+// as they use functions from that class.
+inline DWARFDie::iterator DWARFDie::begin() const {
+  return iterator(getFirstChild());
+}
+
+inline DWARFDie::iterator DWARFDie::end() const {
+  return iterator();
+}
+
+inline iterator_range<DWARFDie::iterator> DWARFDie::children() const {
+  return make_range(begin(), end());
+}

 } // end namespace llvm

--- a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
+++ b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
@ -47,9 +47,9 @@ public:
  /// Locks the channel for writing.
  template <typename FunctionIdT, typename SequenceIdT>
  Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
+    writeLock.lock();
    if (auto Err = serializeSeq(*this, FnId, SeqNo))
      return Err;
-    writeLock.lock();
    return Error::success();
  }

--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@ -28,6 +28,10 @@

 namespace llvm {

+namespace yaml {
+template <typename T> struct MappingTraits;
+}
+
 /// \brief Class to accumulate and hold information about a callee.
 struct CalleeInfo {
  enum class HotnessType : uint8_t { Unknown = 0, Cold = 1, None = 2, Hot = 3 };
@ -102,7 +106,7 @@ public:
  /// \brief Sububclass discriminator (for dyn_cast<> et al.)
  enum SummaryKind : unsigned { AliasKind, FunctionKind, GlobalVarKind };

-  /// Group flags (Linkage, noRename, isOptSize, etc.) as a bitfield.
+  /// Group flags (Linkage, NotEligibleToImport, etc.) as a bitfield.
  struct GVFlags {
    /// \brief The linkage type of the associated global value.
    ///
@ -113,39 +117,20 @@ public:
    /// types based on global summary-based analysis.
    unsigned Linkage : 4;

-    /// Indicate if the global value cannot be renamed (in a specific section,
-    /// possibly referenced from inline assembly, etc).
-    unsigned NoRename : 1;
+    /// Indicate if the global value cannot be imported (e.g. it cannot
+    /// be renamed or references something that can't be renamed).
+    unsigned NotEligibleToImport : 1;

-    /// Indicate if a function contains inline assembly (which is opaque),
-    /// that may reference a local value. This is used to prevent importing
-    /// of this function, since we can't promote and rename the uses of the
-    /// local in the inline assembly. Use a flag rather than bloating the
-    /// summary with references to every possible local value in the
-    /// llvm.used set.
-    unsigned HasInlineAsmMaybeReferencingInternal : 1;
-
-    /// Indicate if the function is not viable to inline.
-    unsigned IsNotViableToInline : 1;
+    /// Indicate that the global value must be considered a live root for
+    /// index-based liveness analysis. Used for special LLVM values such as
+    /// llvm.global_ctors that the linker does not know about.
+    unsigned LiveRoot : 1;

    /// Convenience Constructors
-    explicit GVFlags(GlobalValue::LinkageTypes Linkage, bool NoRename,
-                     bool HasInlineAsmMaybeReferencingInternal,
-                     bool IsNotViableToInline)
-        : Linkage(Linkage), NoRename(NoRename),
-          HasInlineAsmMaybeReferencingInternal(
-              HasInlineAsmMaybeReferencingInternal),
-          IsNotViableToInline(IsNotViableToInline) {}
-
-    GVFlags(const GlobalValue &GV)
-        : Linkage(GV.getLinkage()), NoRename(GV.hasSection()),
-          HasInlineAsmMaybeReferencingInternal(false) {
-      IsNotViableToInline = false;
-      if (const auto *F = dyn_cast<Function>(&GV))
-        // Inliner doesn't handle variadic functions.
-        // FIXME: refactor this to use the same code that inliner is using.
-        IsNotViableToInline = F->isVarArg();
-    }
+    explicit GVFlags(GlobalValue::LinkageTypes Linkage,
+                     bool NotEligibleToImport, bool LiveRoot)
+        : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport),
+          LiveRoot(LiveRoot) {}
  };

 private:
@ -213,31 +198,19 @@ public:
    Flags.Linkage = Linkage;
  }

-  bool isNotViableToInline() const { return Flags.IsNotViableToInline; }
+  /// Return true if this global value can't be imported.
+  bool notEligibleToImport() const { return Flags.NotEligibleToImport; }

-  /// Return true if this summary is for a GlobalValue that needs promotion
-  /// to be referenced from another module.
-  bool needsRenaming() const { return GlobalValue::isLocalLinkage(linkage()); }
+  /// Return true if this global value must be considered a root for live
+  /// value analysis on the index.
+  bool liveRoot() const { return Flags.LiveRoot; }

-  /// Return true if this global value cannot be renamed (in a specific section,
-  /// possibly referenced from inline assembly, etc).
-  bool noRename() const { return Flags.NoRename; }
+  /// Flag that this global value must be considered a root for live
+  /// value analysis on the index.
+  void setLiveRoot() { Flags.LiveRoot = true; }

-  /// Flag that this global value cannot be renamed (in a specific section,
-  /// possibly referenced from inline assembly, etc).
-  void setNoRename() { Flags.NoRename = true; }
-
-  /// Return true if this global value possibly references another value
-  /// that can't be renamed.
-  bool hasInlineAsmMaybeReferencingInternal() const {
-    return Flags.HasInlineAsmMaybeReferencingInternal;
-  }
-
-  /// Flag that this global value possibly references another value that
-  /// can't be renamed.
-  void setHasInlineAsmMaybeReferencingInternal() {
-    Flags.HasInlineAsmMaybeReferencingInternal = true;
-  }
+  /// Flag that this global value cannot be imported.
+  void setNotEligibleToImport() { Flags.NotEligibleToImport = true; }

  /// Return the list of values referenced by this global value definition.
  ArrayRef<ValueInfo> refs() const { return RefEdgeList; }
@ -330,6 +303,30 @@ public:
  }
 };

+struct TypeTestResolution {
+  /// Specifies which kind of type check we should emit for this byte array.
+  /// See http://clang.llvm.org/docs/ControlFlowIntegrityDesign.html for full
+  /// details on each kind of check; the enumerators are described with
+  /// reference to that document.
+  enum Kind {
+    Unsat,     ///< Unsatisfiable type (i.e. no global has this type metadata)
+    ByteArray, ///< Test a byte array (first example)
+    Inline,    ///< Inlined bit vector ("Short Inline Bit Vectors")
+    Single,    ///< Single element (last example in "Short Inline Bit Vectors")
+    AllOnes,   ///< All-ones bit vector ("Eliminating Bit Vector Checks for
+               ///  All-Ones Bit Vectors")
+  } TheKind = Unsat;
+
+  /// Range of the size expressed as a bit width. For example, if the size is in
+  /// range [0,256), this number will be 8. This helps generate the most compact
+  /// instruction sequences.
+  unsigned SizeBitWidth = 0;
+};
+
+struct TypeIdSummary {
+  TypeTestResolution TTRes;
+};
+
 /// 160 bits SHA1
 typedef std::array<uint32_t, 5> ModuleHash;

@ -370,11 +367,20 @@ private:
  /// Holds strings for combined index, mapping to the corresponding module ID.
  ModulePathStringTableTy ModulePathStringTable;

+  /// Mapping from type identifiers to summary information for that type
+  /// identifier.
+  // FIXME: Add bitcode read/write support for this field.
+  std::map<std::string, TypeIdSummary> TypeIdMap;
+
+  // YAML I/O support.
+  friend yaml::MappingTraits<ModuleSummaryIndex>;
+
 public:
  gvsummary_iterator begin() { return GlobalValueMap.begin(); }
  const_gvsummary_iterator begin() const { return GlobalValueMap.begin(); }
  gvsummary_iterator end() { return GlobalValueMap.end(); }
  const_gvsummary_iterator end() const { return GlobalValueMap.end(); }
+  size_t size() const { return GlobalValueMap.size(); }

  /// Get the list of global value summary objects for a given value name.
  const GlobalValueSummaryList &getGlobalValueSummaryList(StringRef ValueName) {
--- a/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@ -0,0 +1,111 @@
+//===-- llvm/ModuleSummaryIndexYAML.h - YAML I/O for summary ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_MODULESUMMARYINDEXYAML_H
+#define LLVM_IR_MODULESUMMARYINDEXYAML_H
+
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<TypeTestResolution::Kind> {
+  static void enumeration(IO &io, TypeTestResolution::Kind &value) {
+    io.enumCase(value, "Unsat", TypeTestResolution::Unsat);
+    io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray);
+    io.enumCase(value, "Inline", TypeTestResolution::Inline);
+    io.enumCase(value, "Single", TypeTestResolution::Single);
+    io.enumCase(value, "AllOnes", TypeTestResolution::AllOnes);
+  }
+};
+
+template <> struct MappingTraits<TypeTestResolution> {
+  static void mapping(IO &io, TypeTestResolution &res) {
+    io.mapRequired("Kind", res.TheKind);
+    io.mapRequired("SizeBitWidth", res.SizeBitWidth);
+  }
+};
+
+template <> struct MappingTraits<TypeIdSummary> {
+  static void mapping(IO &io, TypeIdSummary& summary) {
+    io.mapRequired("TTRes", summary.TTRes);
+  }
+};
+
+struct FunctionSummaryYaml {
+  std::vector<uint64_t> TypeTests;
+};
+
+} // End yaml namespace
+} // End llvm namespace
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(uint64_t)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<FunctionSummaryYaml> {
+  static void mapping(IO &io, FunctionSummaryYaml& summary) {
+    io.mapRequired("TypeTests", summary.TypeTests);
+  }
+};
+
+} // End yaml namespace
+} // End llvm namespace
+
+LLVM_YAML_IS_STRING_MAP(TypeIdSummary)
+LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml)
+
+namespace llvm {
+namespace yaml {
+
+// FIXME: Add YAML mappings for the rest of the module summary.
+template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
+  static void inputOne(IO &io, StringRef Key, GlobalValueSummaryMapTy &V) {
+    std::vector<FunctionSummaryYaml> FSums;
+    io.mapRequired(Key.str().c_str(), FSums);
+    uint64_t KeyInt;
+    if (Key.getAsInteger(0, KeyInt)) {
+      io.setError("key not an integer");
+      return;
+    }
+    auto &Elem = V[KeyInt];
+    for (auto &FSum : FSums) {
+      GlobalValueSummary::GVFlags GVFlags(GlobalValue::ExternalLinkage, false,
+                                          false);
+      Elem.push_back(llvm::make_unique<FunctionSummary>(
+          GVFlags, 0, ArrayRef<ValueInfo>{},
+          ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests)));
+    }
+  }
+  static void output(IO &io, GlobalValueSummaryMapTy &V) {
+    for (auto &P : V) {
+      std::vector<FunctionSummaryYaml> FSums;
+      for (auto &Sum : P.second) {
+        if (auto *FSum = dyn_cast<FunctionSummary>(Sum.get()))
+          FSums.push_back(FunctionSummaryYaml{FSum->type_tests()});
+      }
+      if (!FSums.empty())
+        io.mapRequired(llvm::utostr(P.first).c_str(), FSums);
+    }
+  }
+};
+
+template <> struct MappingTraits<ModuleSummaryIndex> {
+  static void mapping(IO &io, ModuleSummaryIndex& index) {
+    io.mapRequired("GlobalValueMap", index.GlobalValueMap);
+    io.mapRequired("TypeIdMap", index.TypeIdMap);
+  }
+};
+
+} // End yaml namespace
+} // End llvm namespace
+
+#endif
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@ -64,32 +64,31 @@ namespace llvm {
 struct alignas(8) AnalysisKey {};

 /// A special type used to provide an address that identifies a set of related
-/// analyses.
+/// analyses.  These sets are primarily used below to mark sets of analyses as
+/// preserved.
 ///
-/// These sets are primarily used below to mark sets of analyses as preserved.
-/// An example would be analyses depending only on the CFG of a function.
-/// A transformation can mark that it is preserving the CFG of a function and
-/// then analyses can check for this rather than each transform having to fully
-/// enumerate every analysis preserved.
+/// For example, a transformation can indicate that it preserves the CFG of a
+/// function by preserving the appropriate AnalysisSetKey.  An analysis that
+/// depends only on the CFG can then check if that AnalysisSetKey is preserved;
+/// if it is, the analysis knows that it itself is preserved.
 struct alignas(8) AnalysisSetKey {};

-/// Class for tracking what analyses are preserved after a transformation pass
-/// runs over some unit of IR.
+/// A set of analyses that are preserved following a run of a transformation
+/// pass.
 ///
-/// Transformation passes build and return these objects when run over the IR
-/// to communicate which analyses remain valid afterward. For most passes this
-/// is fairly simple: if they don't change anything all analyses are preserved,
+/// Transformation passes build and return these objects to communicate which
+/// analyses are still valid after the transformation. For most passes this is
+/// fairly simple: if they don't change anything all analyses are preserved,
 /// otherwise only a short list of analyses that have been explicitly updated
 /// are preserved.
 ///
-/// This class also provides the ability to mark abstract *sets* of analyses as
-/// preserved. These sets allow passes to indicate that they preserve broad
-/// aspects of the IR (such as its CFG) and analyses to opt in to that being
-/// sufficient without the passes having to fully enumerate such analyses.
+/// This class also lets transformation passes mark abstract *sets* of analyses
+/// as preserved. A transformation that (say) does not alter the CFG can
+/// indicate such by marking a particular AnalysisSetKey as preserved, and
+/// then analyses can query whether that AnalysisSetKey is preserved.
 ///
-/// Finally, this class can represent "abandoning" an analysis, which marks it
-/// as not-preserved even if it would be covered by some abstract set of
-/// analyses.
+/// Finally, this class can represent an "abandoned" analysis, which is
+/// not preserved even if it would be covered by some abstract set of analyses.
 ///
 /// Given a `PreservedAnalyses` object, an analysis will typically want to
 /// figure out whether it is preserved. In the example below, MyAnalysisType is
@ -120,7 +119,8 @@ public:
  /// Mark an analysis as preserved.
  template <typename AnalysisT> void preserve() { preserve(AnalysisT::ID()); }

-  /// Mark an analysis as preserved using its ID.
+  /// \brief Given an analysis's ID, mark the analysis as preserved, adding it
+  /// to the set.
  void preserve(AnalysisKey *ID) {
    // Clear this ID from the explicit not-preserved set if present.
    NotPreservedAnalysisIDs.erase(ID);
@ -224,17 +224,17 @@ public:
        : PA(PA), ID(ID), IsAbandoned(PA.NotPreservedAnalysisIDs.count(ID)) {}

  public:
-    /// Returns true if the checker's analysis was not abandoned and the
-    /// analysis is either is explicitly preserved or all analyses are
-    /// preserved.
+    /// Returns true if the checker's analysis was not abandoned and either
+    ///  - the analysis is explicitly preserved or
+    ///  - all analyses are preserved.
    bool preserved() {
      return !IsAbandoned && (PA.PreservedIDs.count(&AllAnalysesKey) ||
                              PA.PreservedIDs.count(ID));
    }

-    /// Returns true if the checker's analysis was not abandoned and either the
-    /// provided set type is either explicitly preserved or all analyses are
-    /// preserved.
+    /// Returns true if the checker's analysis was not abandoned and either
+    ///  - \p AnalysisSetT is explicitly preserved or
+    ///  - all analyses are preserved.
    template <typename AnalysisSetT> bool preservedSet() {
      AnalysisSetKey *SetID = AnalysisSetT::ID();
      return !IsAbandoned && (PA.PreservedIDs.count(&AllAnalysesKey) ||
@ -262,8 +262,8 @@ public:

  /// Test whether all analyses are preserved (and none are abandoned).
  ///
-  /// This lets analyses optimize for the common case where a transformation
-  /// made no changes to the IR.
+  /// This is used primarily to optimize for the common case of a transformation
+  /// which makes no changes to the IR.
  bool areAllPreserved() const {
    return NotPreservedAnalysisIDs.empty() &&
           PreservedIDs.count(&AllAnalysesKey);
@ -307,9 +307,9 @@ template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
 /// A CRTP mix-in to automatically provide informational APIs needed for
 /// passes.
 ///
-/// This provides some boiler plate for types that are passes.
+/// This provides some boilerplate for types that are passes.
 template <typename DerivedT> struct PassInfoMixin {
-  /// Returns the name of the derived pass type.
+  /// Gets the name of the pass we are mixed into.
  static StringRef name() {
    StringRef Name = getTypeName<DerivedT>();
    if (Name.startswith("llvm::"))
@ -318,41 +318,35 @@ template <typename DerivedT> struct PassInfoMixin {
  }
 };

-/// A CRTP mix-in to automatically provide informational APIs needed for
-/// analysis passes.
+/// A CRTP mix-in that provides informational APIs needed for analysis passes.
 ///
-/// This provides some boiler plate for types that are analysis passes. It
-/// automatically mixes in \c PassInfoMixin and adds informational APIs
-/// specifically used for analyses.
+/// This provides some boilerplate for types that are analysis passes. It
+/// automatically mixes in \c PassInfoMixin.
 template <typename DerivedT>
 struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
  /// Returns an opaque, unique ID for this analysis type.
  ///
-  /// This ID is a pointer type that is guaranteed to be 8-byte aligned and
-  /// thus suitable for use in sets, maps, and other data structures optimized
-  /// for pointer-like types using the alignment-provided low bits.
+  /// This ID is a pointer type that is guaranteed to be 8-byte aligned and thus
+  /// suitable for use in sets, maps, and other data structures that use the low
+  /// bits of pointers.
  ///
  /// Note that this requires the derived type provide a static \c AnalysisKey
  /// member called \c Key.
  ///
-  /// FIXME: The only reason the derived type needs to provide this rather than
-  /// this mixin providing it is due to broken implementations which cannot
-  /// correctly unique a templated static so that they have the same addresses
-  /// for each instantiation and are definitively emitted once for each
-  /// instantiation. The only currently known platform with this limitation are
-  /// Windows DLL builds, specifically building each part of LLVM as a DLL. If
-  /// we ever remove that build configuration, this mixin can provide the
-  /// static key as well.
+  /// FIXME: The only reason the mixin type itself can't declare the Key value
+  /// is that some compilers cannot correctly unique a templated static variable
+  /// so it has the same addresses in each instantiation. The only currently
+  /// known platform with this limitation is Windows DLL builds, specifically
+  /// building each part of LLVM as a DLL. If we ever remove that build
+  /// configuration, this mixin can provide the static key as well.
  static AnalysisKey *ID() { return &DerivedT::Key; }
 };

-/// A class template to provide analysis sets for IR units.
+/// This templated class represents "all analyses that operate over \<a
+/// particular IR unit\>" (e.g. a Function or a Module) in instances of
+/// PreservedAnalysis.
 ///
-/// Analyses operate on units of IR. It is useful to be able to talk about
-/// preservation of all analyses for a given unit of IR as a set. This class
-/// template can be used with the \c PreservedAnalyses API for that purpose and
-/// the \c AnalysisManager will automatically check and use this set to skip
-/// invalidation events.
+/// This lets a transformation say e.g. "I preserved all function analyses".
 ///
 /// Note that you must provide an explicit instantiation declaration and
 /// definition for this template in order to get the correct behavior on
@ -371,17 +365,18 @@ template <typename IRUnitT> AnalysisSetKey AllAnalysesOn<IRUnitT>::SetKey;
 extern template class AllAnalysesOn<Module>;
 extern template class AllAnalysesOn<Function>;

-/// \brief Manages a sequence of passes over units of IR.
+/// \brief Manages a sequence of passes over a particular unit of IR.
 ///
-/// A pass manager contains a sequence of passes to run over units of IR. It is
-/// itself a valid pass over that unit of IR, and when over some given IR will
-/// run each pass in sequence. This is the primary and most basic building
-/// block of a pass pipeline.
+/// A pass manager contains a sequence of passes to run over a particular unit
+/// of IR (e.g. Functions, Modules). It is itself a valid pass over that unit of
+/// IR, and when run over some given IR will run each of its contained passes in
+/// sequence. Pass managers are the primary and most basic building block of a
+/// pass pipeline.
 ///
-/// If it is run with an \c AnalysisManager<IRUnitT> argument, it will propagate
-/// that analysis manager to each pass it runs, as well as calling the analysis
-/// manager's invalidation routine with the PreservedAnalyses of each pass it
-/// runs.
+/// When you run a pass manager, you provide an \c AnalysisManager<IRUnitT>
+/// argument. The pass manager will propagate that analysis manager to each
+/// pass it runs, and will call the analysis manager's invalidation routine with
+/// the PreservedAnalyses of each pass it runs.
 template <typename IRUnitT,
          typename AnalysisManagerT = AnalysisManager<IRUnitT>,
          typename... ExtraArgTs>
@ -390,7 +385,7 @@ class PassManager : public PassInfoMixin<
 public:
  /// \brief Construct a pass manager.
  ///
-  /// It can be passed a flag to get debug logging as the passes are run.
+  /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs().
  explicit PassManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}

  // FIXME: These are equivalent to the default move constructor/move
@ -400,13 +395,15 @@ public:
  PassManager(PassManager &&Arg)
      : Passes(std::move(Arg.Passes)),
        DebugLogging(std::move(Arg.DebugLogging)) {}
+
  PassManager &operator=(PassManager &&RHS) {
    Passes = std::move(RHS.Passes);
    DebugLogging = std::move(RHS.DebugLogging);
    return *this;
  }

-  /// \brief Run all of the passes in this manager over the IR.
+  /// \brief Run all of the passes in this manager over the given unit of IR.
+  /// ExtraArgs are passed to each pass.
  PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
                        ExtraArgTs... ExtraArgs) {
    PreservedAnalyses PA = PreservedAnalyses::all();
@ -425,7 +422,7 @@ public:
      // invalidates analyses.
      AM.invalidate(IR, PassPA);

-      // Finally, we intersect the preserved analyses to compute the aggregate
+      // Finally, intersect the preserved analyses to compute the aggregate
      // preserved set for this pass manager.
      PA.intersect(std::move(PassPA));

@ -473,30 +470,29 @@ extern template class PassManager<Function>;
 /// \brief Convenience typedef for a pass manager over functions.
 typedef PassManager<Function> FunctionPassManager;

-/// \brief A generic analysis pass manager with lazy running and caching of
+/// \brief A container for analyses that lazily runs them and caches their
 /// results.
 ///
-/// This analysis manager can be used for any IR unit where the address of the
-/// IR unit sufficies as its identity. It manages the cache for a unit of IR via
-/// the address of each unit of IR cached.
+/// This class can manage analyses for any IR unit where the address of the IR
+/// unit sufficies as its identity.
 template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager {
 public:
  class Invalidator;

 private:
-  // Now that we've defined our invalidator, we can build types for the concept
-  // types.
+  // Now that we've defined our invalidator, we can define the concept types.
  typedef detail::AnalysisResultConcept<IRUnitT, PreservedAnalyses, Invalidator>
      ResultConceptT;
  typedef detail::AnalysisPassConcept<IRUnitT, PreservedAnalyses, Invalidator,
                                      ExtraArgTs...>
      PassConceptT;

-  /// \brief List of function analysis pass IDs and associated concept pointers.
+  /// \brief List of analysis pass IDs and associated concept pointers.
  ///
  /// Requires iterators to be valid across appending new entries and arbitrary
-  /// erases. Provides the analysis ID to enable finding iterators to a given entry
-  /// in maps below, and provides the storage for the actual result concept.
+  /// erases. Provides the analysis ID to enable finding iterators to a given
+  /// entry in maps below, and provides the storage for the actual result
+  /// concept.
  typedef std::list<std::pair<AnalysisKey *, std::unique_ptr<ResultConceptT>>>
      AnalysisResultListT;

@ -504,8 +500,8 @@ private:
  typedef DenseMap<IRUnitT *, AnalysisResultListT> AnalysisResultListMapT;

  /// \brief Map type from a pair of analysis ID and IRUnitT pointer to an
-  /// iterator into a particular result list which is where the actual result
-  /// is stored.
+  /// iterator into a particular result list (which is where the actual analysis
+  /// result is stored).
  typedef DenseMap<std::pair<AnalysisKey *, IRUnitT *>,
                   typename AnalysisResultListT::iterator>
      AnalysisResultMapT;
@ -515,28 +511,28 @@ public:
  ///
  /// When an analysis result embeds handles to other analysis results, it
  /// needs to be invalidated both when its own information isn't preserved and
-  /// if any of those embedded analysis results end up invalidated. We pass in
-  /// an \c Invalidator object from the analysis manager in order to let the
-  /// analysis results themselves define the dependency graph on the fly. This
-  /// avoids building an explicit data structure representation of the
+  /// when any of its embedded analysis results end up invalidated. We pass an
+  /// \c Invalidator object as an argument to \c invalidate() in order to let
+  /// the analysis results themselves define the dependency graph on the fly.
+  /// This lets us avoid building building an explicit representation of the
  /// dependencies between analysis results.
  class Invalidator {
  public:
    /// Trigger the invalidation of some other analysis pass if not already
-    /// handled and return whether it will in fact be invalidated.
+    /// handled and return whether it was in fact invalidated.
    ///
    /// This is expected to be called from within a given analysis result's \c
    /// invalidate method to trigger a depth-first walk of all inter-analysis
    /// dependencies. The same \p IR unit and \p PA passed to that result's \c
    /// invalidate method should in turn be provided to this routine.
    ///
-    /// The first time this is called for a given analysis pass, it will
-    /// trigger the corresponding result's \c invalidate method to be called.
-    /// Subsequent calls will use a cache of the results of that initial call.
-    /// It is an error to form cyclic dependencies between analysis results.
+    /// The first time this is called for a given analysis pass, it will call
+    /// the corresponding result's \c invalidate method.  Subsequent calls will
+    /// use a cache of the results of that initial call.  It is an error to form
+    /// cyclic dependencies between analysis results.
    ///
-    /// This returns true if the given analysis pass's result is invalid and
-    /// any dependecies on it will become invalid as a result.
+    /// This returns true if the given analysis's result is invalid. Any
+    /// dependecies on it will become invalid as a result.
    template <typename PassT>
    bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA) {
      typedef detail::AnalysisResultModel<IRUnitT, PassT,
@ -577,10 +573,10 @@ public:

      auto &Result = static_cast<ResultT &>(*RI->second->second);

-      // Insert into the map whether the result should be invalidated and
-      // return that. Note that we cannot re-use IMapI and must do a fresh
-      // insert here as calling the invalidate routine could (recursively)
-      // insert things into the map making any iterator or reference invalid.
+      // Insert into the map whether the result should be invalidated and return
+      // that. Note that we cannot reuse IMapI and must do a fresh insert here,
+      // as calling invalidate could (recursively) insert things into the map,
+      // making any iterator or reference invalid.
      bool Inserted;
      std::tie(IMapI, Inserted) =
          IsResultInvalidated.insert({ID, Result.invalidate(IR, PA, *this)});
@ -600,8 +596,7 @@ public:

  /// \brief Construct an empty analysis manager.
  ///
-  /// A flag can be passed to indicate that the manager should perform debug
-  /// logging.
+  /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs().
  AnalysisManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
  AnalysisManager(AnalysisManager &&) = default;
  AnalysisManager &operator=(AnalysisManager &&) = default;
@ -614,11 +609,11 @@ public:
    return AnalysisResults.empty();
  }

-  /// \brief Clear any results for a single unit of IR.
+  /// \brief Clear any cached analysis results for a single unit of IR.
  ///
-  /// This doesn't invalidate but directly clears the results. It is useful
-  /// when the IR is being removed and we want to clear out all the memory
-  /// pinned for it.
+  /// This doesn't invalidate, but instead simply deletes, the relevant results.
+  /// It is useful when the IR is being removed and we want to clear out all the
+  /// memory pinned for it.
  void clear(IRUnitT &IR) {
    if (DebugLogging)
      dbgs() << "Clearing all analysis results for: " << IR.getName() << "\n";
@ -626,7 +621,7 @@ public:
    auto ResultsListI = AnalysisResultLists.find(&IR);
    if (ResultsListI == AnalysisResultLists.end())
      return;
-    // Clear the map pointing into the results list.
+    // Delete the map entries that point into the results list.
    for (auto &IDAndResult : ResultsListI->second)
      AnalysisResults.erase({IDAndResult.first, &IR});

@ -634,21 +629,20 @@ public:
    AnalysisResultLists.erase(ResultsListI);
  }

-  /// \brief Clear the analysis result cache.
+  /// \brief Clear all analysis results cached by this AnalysisManager.
  ///
-  /// This routine allows cleaning up when the set of IR units itself has
-  /// potentially changed, and thus we can't even look up a a result and
-  /// invalidate it directly. Notably, this does *not* call invalidate
-  /// functions as there is nothing to be done for them.
+  /// Like \c clear(IRUnitT&), this doesn't invalidate the results; it simply
+  /// deletes them.  This lets you clean up the AnalysisManager when the set of
+  /// IR units itself has potentially changed, and thus we can't even look up a
+  /// a result and invalidate/clear it directly.
  void clear() {
    AnalysisResults.clear();
    AnalysisResultLists.clear();
  }

-  /// \brief Get the result of an analysis pass for this module.
+  /// \brief Get the result of an analysis pass for a given IR unit.
  ///
-  /// If there is not a valid cached result in the manager already, this will
-  /// re-run the analysis to produce a valid result.
+  /// Runs the analysis if a cached result is not available.
  template <typename PassT>
  typename PassT::Result &getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs) {
    assert(AnalysisPasses.count(PassT::ID()) &&
@ -661,7 +655,7 @@ public:
    return static_cast<ResultModelT &>(ResultConcept).Result;
  }

-  /// \brief Get the cached result of an analysis pass for this module.
+  /// \brief Get the cached result of an analysis pass for a given IR unit.
  ///
  /// This method never runs the analysis.
  ///
@ -683,22 +677,21 @@ public:

  /// \brief Register an analysis pass with the manager.
  ///
-  /// The argument is a callable whose result is a pass. This allows passing in
-  /// a lambda to construct the pass.
+  /// The parameter is a callable whose result is an analysis pass. This allows
+  /// passing in a lambda to construct the analysis.
  ///
-  /// The pass type registered is the result type of calling the argument. If
-  /// that pass has already been registered, then the argument will not be
-  /// called and this function will return false. Otherwise, the pass type
-  /// becomes registered, with the instance provided by calling the argument
-  /// once, and this function returns true.
+  /// The analysis type to register is the type returned by calling the \c
+  /// PassBuilder argument. If that type has already been registered, then the
+  /// argument will not be called and this function will return false.
+  /// Otherwise, we register the analysis returned by calling \c PassBuilder(),
+  /// and this function returns true.
  ///
-  /// While this returns whether or not the pass type was already registered,
-  /// there in't an independent way to query that as that would be prone to
-  /// risky use when *querying* the analysis manager. Instead, the only
-  /// supported use case is avoiding duplicate registry of an analysis. This
-  /// interface also lends itself to minimizing the number of times we have to
-  /// do lookups for analyses or construct complex passes only to throw them
-  /// away.
+  /// (Note: Although the return value of this function indicates whether or not
+  /// an analysis was previously registered, there intentionally isn't a way to
+  /// query this directly.  Instead, you should just register all the analyses
+  /// you might want and let this class run them lazily.  This idiom lets us
+  /// minimize the number of times we have to look up analyses in our
+  /// hashtable.)
  template <typename PassBuilderT>
  bool registerPass(PassBuilderT &&PassBuilder) {
    typedef decltype(PassBuilder()) PassT;
@ -718,17 +711,18 @@ public:

  /// \brief Invalidate a specific analysis pass for an IR module.
  ///
-  /// Note that the analysis result can disregard invalidation.
+  /// Note that the analysis result can disregard invalidation, if it determines
+  /// it is in fact still valid.
  template <typename PassT> void invalidate(IRUnitT &IR) {
    assert(AnalysisPasses.count(PassT::ID()) &&
           "This analysis pass was not registered prior to being invalidated");
    invalidateImpl(PassT::ID(), IR);
  }

-  /// \brief Invalidate analyses cached for an IR unit.
+  /// \brief Invalidate cached analyses for an IR unit.
  ///
  /// Walk through all of the analyses pertaining to this unit of IR and
-  /// invalidate them unless they are preserved by the PreservedAnalyses set.
+  /// invalidate them, unless they are preserved by the PreservedAnalyses set.
  void invalidate(IRUnitT &IR, const PreservedAnalyses &PA) {
    // We're done if all analyses on this IR unit are preserved.
    if (PA.allAnalysesInSetPreserved<AllAnalysesOn<IRUnitT>>())
@ -738,8 +732,8 @@ public:
      dbgs() << "Invalidating all non-preserved analyses for: " << IR.getName()
             << "\n";

-    // Track whether each pass's result is invalidated. Memoize the results
-    // using the IsResultInvalidated map.
+    // Track whether each analysis's result is invalidated in
+    // IsResultInvalidated.
    SmallDenseMap<AnalysisKey *, bool, 8> IsResultInvalidated;
    Invalidator Inv(IsResultInvalidated, AnalysisResults);
    AnalysisResultListT &ResultsList = AnalysisResultLists[&IR];
@ -758,9 +752,9 @@ public:

      // Try to invalidate the result, giving it the Invalidator so it can
      // recursively query for any dependencies it has and record the result.
-      // Note that we cannot re-use 'IMapI' here or pre-insert the ID as the
-      // invalidate method may insert things into the map as well, invalidating
-      // any iterator or pointer.
+      // Note that we cannot reuse 'IMapI' here or pre-insert the ID, as
+      // Result.invalidate may insert things into the map, invalidating our
+      // iterator.
      bool Inserted =
          IsResultInvalidated.insert({ID, Result.invalidate(IR, PA, Inv)})
              .second;
@ -873,7 +867,7 @@ private:
  /// analysis result.
  AnalysisResultMapT AnalysisResults;

-  /// \brief A flag indicating whether debug logging is enabled.
+  /// \brief Indicates whether we log to \c llvm::dbgs().
  bool DebugLogging;
 };

--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@ -382,6 +382,10 @@ private:
    /// The unmangled name of the global.
    std::string IRName;

+    /// Keep track if the symbol is visible outside of ThinLTO (i.e. in
+    /// either a regular object or the regular LTO partition).
+    bool VisibleOutsideThinLTO = false;
+
    bool UnnamedAddr = true;

    /// This field keeps track of the partition number of this global. The
@ -405,6 +409,9 @@ private:
      /// This global is either used by more than one partition or has an
      /// external reference, and therefore cannot be internalized.
      External = -2u,
+
+      /// The RegularLTO partition
+      RegularLTO = 0,
    };
  };

--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@ -11,6 +11,7 @@
 #define LLVM_MC_MCTARGETOPTIONS_H

 #include <string>
+#include <vector>

 namespace llvm {

@ -51,11 +52,17 @@ public:
  bool PreserveAsmComments : 1;

  int DwarfVersion;
+
  /// getABIName - If this returns a non-empty string this represents the
  /// textual name of the ABI that we want the backend to use, e.g. o32, or
  /// aapcs-linux.
  StringRef getABIName() const;
  std::string ABIName;
+
+  /// Additional paths to search for `.include` directives when using the
+  /// integrated assembler.
+  std::vector<std::string> IASSearchPaths;
+
  MCTargetOptions();
 };

@ -75,7 +82,8 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) {
          ARE_EQUAL(ShowMCInst) &&
          ARE_EQUAL(AsmVerbose) &&
          ARE_EQUAL(DwarfVersion) &&
-          ARE_EQUAL(ABIName));
+          ARE_EQUAL(ABIName) &&
+          ARE_EQUAL(IASSearchPaths));
 #undef ARE_EQUAL
 }

--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@ -27,7 +27,6 @@
 #ifndef LLVM_SUPPORT_FILESYSTEM_H
 #define LLVM_SUPPORT_FILESYSTEM_H

-#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@ -37,6 +36,7 @@
 #include <cassert>
 #include <cstdint>
 #include <ctime>
+#include <memory>
 #include <stack>
 #include <string>
 #include <system_error>
@ -829,28 +829,23 @@ public:
 };

 namespace detail {
-  /// RecDirIterState - Keeps state for the recursive_directory_iterator. It is
-  /// reference counted in order to preserve InputIterator semantics on copy.
-  struct RecDirIterState : public RefCountedBase<RecDirIterState> {
-    RecDirIterState()
-      : Level(0)
-      , HasNoPushRequest(false) {}
-
+  /// Keeps state for the recursive_directory_iterator.
+  struct RecDirIterState {
    std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
-    uint16_t Level;
-    bool HasNoPushRequest;
+    uint16_t Level = 0;
+    bool HasNoPushRequest = false;
  };
 } // end namespace detail

 /// recursive_directory_iterator - Same as directory_iterator except for it
 /// recurses down into child directories.
 class recursive_directory_iterator {
-  IntrusiveRefCntPtr<detail::RecDirIterState> State;
+  std::shared_ptr<detail::RecDirIterState> State;

 public:
  recursive_directory_iterator() = default;
  explicit recursive_directory_iterator(const Twine &path, std::error_code &ec)
-      : State(new detail::RecDirIterState) {
+      : State(std::make_shared<detail::RecDirIterState>()) {
    State->Stack.push(directory_iterator(path, ec));
    if (State->Stack.top() == directory_iterator())
      State.reset();
--- a/include/llvm/Support/TarWriter.h
+++ b/include/llvm/Support/TarWriter.h
@ -0,0 +1,32 @@
+//===-- llvm/Support/TarWriter.h - Tar archive file creator -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TAR_WRITER_H
+#define LLVM_SUPPORT_TAR_WRITER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+class TarWriter {
+public:
+  static Expected<std::unique_ptr<TarWriter>> create(StringRef OutputPath,
+                                                     StringRef BaseDir);
+
+  void append(StringRef Path, StringRef Data);
+
+private:
+  TarWriter(int FD, StringRef BaseDir);
+  raw_fd_ostream OS;
+  std::string BaseDir;
+};
+}
+
+#endif
--- a/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/include/llvm/Transforms/IPO/FunctionImport.h
@ -86,11 +86,15 @@ public:
 /// \p ExportLists contains for each Module the set of globals (GUID) that will
 /// be imported by another module, or referenced by such a function. I.e. this
 /// is the set of globals that need to be promoted/renamed appropriately.
+///
+/// \p DeadSymbols (optional) contains a list of GUID that are deemed "dead" and
+/// will be ignored for the purpose of importing.
 void ComputeCrossModuleImport(
    const ModuleSummaryIndex &Index,
    const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
    StringMap<FunctionImporter::ImportMapTy> &ImportLists,
-    StringMap<FunctionImporter::ExportSetTy> &ExportLists);
+    StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+    const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr);

 /// Compute all the imports for the given module using the Index.
 ///
@ -100,6 +104,13 @@ void ComputeCrossModuleImportForModule(
    StringRef ModulePath, const ModuleSummaryIndex &Index,
    FunctionImporter::ImportMapTy &ImportList);

+/// Compute all the symbols that are "dead": i.e these that can't be reached
+/// in the graph from any of the given symbols listed in
+/// \p GUIDPreservedSymbols.
+DenseSet<GlobalValue::GUID>
+computeDeadSymbols(const ModuleSummaryIndex &Index,
+                   const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
+
 /// Compute the set of summaries needed for a ThinLTO backend compilation of
 /// \p ModulePath.
 //
--- a/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/include/llvm/Transforms/IPO/LowerTypeTests.h
@ -60,10 +60,6 @@ struct BitSetInfo {

  bool containsGlobalOffset(uint64_t Offset) const;

-  bool containsValue(const DataLayout &DL,
-                     const DenseMap<GlobalObject *, uint64_t> &GlobalLayout,
-                     Value *V, uint64_t COffset = 0) const;
-
  void print(raw_ostream &OS) const;
 };

--- a/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/include/llvm/Transforms/Utils/FunctionImportUtils.h
@ -40,9 +40,20 @@ class FunctionImportGlobalProcessing {
  /// as part of a different backend compilation process.
  bool HasExportedFunctions = false;

+  /// Set of llvm.*used values, in order to validate that we don't try
+  /// to promote any non-renamable values.
+  SmallPtrSet<GlobalValue *, 8> Used;
+
  /// Check if we should promote the given local value to global scope.
  bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);

+#ifndef NDEBUG
+  /// Check if the given value is a local that can't be renamed (promoted).
+  /// Only used in assertion checking, and disabled under NDEBUG since the Used
+  /// set will not be populated.
+  bool isNonRenamableLocal(const GlobalValue &GV) const;
+#endif
+
  /// Helper methods to check if we are importing from or potentially
  /// exporting from the current source module.
  bool isPerformingImport() const { return GlobalsToImport != nullptr; }
@ -82,6 +93,13 @@ public:
    // may be exported to another backend compilation.
    if (!GlobalsToImport)
      HasExportedFunctions = ImportIndex.hasExportedFunctions(M);
+
+#ifndef NDEBUG
+    // First collect those in the llvm.used set.
+    collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
+    // Next collect those in the llvm.compiler.used set.
+    collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+#endif
  }

  bool run();
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@ -151,6 +151,7 @@ module LLVM_intrinsic_gen {
  module IR_NoFolder { header "IR/NoFolder.h" export * }
  module IR_Module { header "IR/Module.h" export * }
  module IR_ModuleSummaryIndex { header "IR/ModuleSummaryIndex.h" export * }
+  module IR_ModuleSummaryIndexYAML { header "IR/ModuleSummaryIndexYAML.h" export * }
  module IR_Function { header "IR/Function.h" export * }
  module IR_InstrTypes { header "IR/InstrTypes.h" export * }
  module IR_Instructions { header "IR/Instructions.h" export * }
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@ -80,10 +80,15 @@ static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
  return CalleeInfo::HotnessType::None;
 }

-static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
-                                   const Function &F, BlockFrequencyInfo *BFI,
-                                   ProfileSummaryInfo *PSI,
-                                   bool HasLocalsInUsed) {
+static bool isNonRenamableLocal(const GlobalValue &GV) {
+  return GV.hasSection() && GV.hasLocalLinkage();
+}
+
+static void
+computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+                       const Function &F, BlockFrequencyInfo *BFI,
+                       ProfileSummaryInfo *PSI, bool HasLocalsInUsed,
+                       DenseSet<GlobalValue::GUID> &CantBePromoted) {
  // Summary not currently supported for anonymous functions, they should
  // have been named.
  assert(F.hasName());
@ -178,37 +183,64 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
      }
    }

-  GlobalValueSummary::GVFlags Flags(F);
+  bool NonRenamableLocal = isNonRenamableLocal(F);
+  bool NotEligibleForImport =
+      NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
+      // Inliner doesn't handle variadic functions.
+      // FIXME: refactor this to use the same code that inliner is using.
+      F.isVarArg();
+  GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
+                                    /* LiveRoot = */ false);
  auto FuncSummary = llvm::make_unique<FunctionSummary>(
      Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
      TypeTests.takeVector());
-  if (HasInlineAsmMaybeReferencingInternal)
-    FuncSummary->setHasInlineAsmMaybeReferencingInternal();
+  if (NonRenamableLocal)
+    CantBePromoted.insert(F.getGUID());
  Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
 }

-static void computeVariableSummary(ModuleSummaryIndex &Index,
-                                   const GlobalVariable &V) {
+static void
+computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
+                       DenseSet<GlobalValue::GUID> &CantBePromoted) {
  SetVector<ValueInfo> RefEdges;
  SmallPtrSet<const User *, 8> Visited;
  findRefEdges(&V, RefEdges, Visited);
-  GlobalValueSummary::GVFlags Flags(V);
+  bool NonRenamableLocal = isNonRenamableLocal(V);
+  GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
+                                    /* LiveRoot = */ false);
  auto GVarSummary =
      llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+  if (NonRenamableLocal)
+    CantBePromoted.insert(V.getGUID());
  Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary));
 }

-static void computeAliasSummary(ModuleSummaryIndex &Index,
-                                const GlobalAlias &A) {
-  GlobalValueSummary::GVFlags Flags(A);
+static void
+computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
+                    DenseSet<GlobalValue::GUID> &CantBePromoted) {
+  bool NonRenamableLocal = isNonRenamableLocal(A);
+  GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
+                                    /* LiveRoot = */ false);
  auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
  auto *Aliasee = A.getBaseObject();
  auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
  assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
  AS->setAliasee(AliaseeSummary);
+  if (NonRenamableLocal)
+    CantBePromoted.insert(A.getGUID());
  Index.addGlobalValueSummary(A.getName(), std::move(AS));
 }

+// Set LiveRoot flag on entries matching the given value name.
+static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
+  auto SummaryList =
+      Index.findGlobalValueSummaryList(GlobalValue::getGUID(Name));
+  if (SummaryList == Index.end())
+    return;
+  for (auto &Summary : SummaryList->second)
+    Summary->setLiveRoot();
+}
+
 ModuleSummaryIndex llvm::buildModuleSummaryIndex(
    const Module &M,
    std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
@ -226,9 +258,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
  // Next collect those in the llvm.compiler.used set.
  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+  DenseSet<GlobalValue::GUID> CantBePromoted;
  for (auto *V : Used) {
-    if (V->hasLocalLinkage())
+    if (V->hasLocalLinkage()) {
      LocalsUsed.insert(V);
+      CantBePromoted.insert(V->getGUID());
+    }
  }

  // Compute summaries for all functions defined in module, and save in the
@ -248,7 +283,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
      BFI = BFIPtr.get();
    }

-    computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty());
+    computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty(),
+                           CantBePromoted);
  }

  // Compute summaries for all variables defined in module, and save in the
@ -256,20 +292,29 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
  for (const GlobalVariable &G : M.globals()) {
    if (G.isDeclaration())
      continue;
-    computeVariableSummary(Index, G);
+    computeVariableSummary(Index, G, CantBePromoted);
  }

  // Compute summaries for all aliases defined in module, and save in the
  // index.
  for (const GlobalAlias &A : M.aliases())
-    computeAliasSummary(Index, A);
+    computeAliasSummary(Index, A, CantBePromoted);

  for (auto *V : LocalsUsed) {
    auto *Summary = Index.getGlobalValueSummary(*V);
    assert(Summary && "Missing summary for global value");
-    Summary->setNoRename();
+    Summary->setNotEligibleToImport();
  }

+  // The linker doesn't know about these LLVM produced values, so we need
+  // to flag them as live in the index to ensure index-based dead value
+  // analysis treats them as live roots of the analysis.
+  setLiveRoot(Index, "llvm.used");
+  setLiveRoot(Index, "llvm.compiler.used");
+  setLiveRoot(Index, "llvm.global_ctors");
+  setLiveRoot(Index, "llvm.global_dtors");
+  setLiveRoot(Index, "llvm.global.annotations");
+
  if (!M.getModuleInlineAsm().empty()) {
    // Collect the local values defined by module level asm, and set up
    // summaries for these symbols so that they can be marked as NoRename,
@ -282,7 +327,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
    // referenced from there.
    ModuleSymbolTable::CollectAsmSymbols(
        Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
-        [&M, &Index](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+        [&M, &Index, &CantBePromoted](StringRef Name,
+                                      object::BasicSymbolRef::Flags Flags) {
          // Symbols not marked as Weak or Global are local definitions.
          if (Flags & (object::BasicSymbolRef::SF_Weak |
                       object::BasicSymbolRef::SF_Global))
@ -291,11 +337,10 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
          if (!GV)
            return;
          assert(GV->isDeclaration() && "Def in module asm already has definition");
-          GlobalValueSummary::GVFlags GVFlags(
-              GlobalValue::InternalLinkage,
-              /* NoRename */ true,
-              /* HasInlineAsmMaybeReferencingInternal */ false,
-              /* IsNotViableToInline */ true);
+          GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
+                                              /* NotEligibleToImport */ true,
+                                              /* LiveRoot */ true);
+          CantBePromoted.insert(GlobalValue::getGUID(Name));
          // Create the appropriate summary type.
          if (isa<Function>(GV)) {
            std::unique_ptr<FunctionSummary> Summary =
@ -303,18 +348,41 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
                    GVFlags, 0, ArrayRef<ValueInfo>{},
                    ArrayRef<FunctionSummary::EdgeTy>{},
                    ArrayRef<GlobalValue::GUID>{});
-            Summary->setNoRename();
            Index.addGlobalValueSummary(Name, std::move(Summary));
          } else {
            std::unique_ptr<GlobalVarSummary> Summary =
                llvm::make_unique<GlobalVarSummary>(GVFlags,
                                                    ArrayRef<ValueInfo>{});
-            Summary->setNoRename();
            Index.addGlobalValueSummary(Name, std::move(Summary));
          }
        });
  }

+  for (auto &GlobalList : Index) {
+    assert(GlobalList.second.size() == 1 &&
+           "Expected module's index to have one summary per GUID");
+    auto &Summary = GlobalList.second[0];
+    bool AllRefsCanBeExternallyReferenced =
+        llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
+          return !CantBePromoted.count(VI.getValue()->getGUID());
+        });
+    if (!AllRefsCanBeExternallyReferenced) {
+      Summary->setNotEligibleToImport();
+      continue;
+    }
+
+    if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
+      bool AllCallsCanBeExternallyReferenced = llvm::all_of(
+          FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
+            auto GUID = Edge.first.isGUID() ? Edge.first.getGUID()
+                                            : Edge.first.getValue()->getGUID();
+            return !CantBePromoted.count(GUID);
+          });
+      if (!AllCallsCanBeExternallyReferenced)
+        Summary->setNotEligibleToImport();
+    }
+  }
+
  return Index;
 }

--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@ -389,8 +389,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
 }

 int TargetTransformInfo::getAddressComputationCost(Type *Tp,
-                                                   bool IsComplex) const {
-  int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+                                                   ScalarEvolution *SE,
+                                                   const SCEV *Ptr) const {
+  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@ -801,12 +801,12 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
  // to getDecodedLinkage() will need to be taken into account here as above.
  auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
  RawFlags = RawFlags >> 4;
-  bool NoRename = RawFlags & 0x1;
-  bool IsNotViableToInline = RawFlags & 0x2;
-  bool HasInlineAsmMaybeReferencingInternal = RawFlags & 0x4;
-  return GlobalValueSummary::GVFlags(Linkage, NoRename,
-                                     HasInlineAsmMaybeReferencingInternal,
-                                     IsNotViableToInline);
+  bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3;
+  // The LiveRoot flag wasn't introduced until version 3. For dead stripping
+  // to work correctly on earlier versions, we must conservatively treat all
+  // values as live.
+  bool LiveRoot = (RawFlags & 0x2) || Version < 3;
+  return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot);
 }

 static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
@ -4838,9 +4838,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
  }
  const uint64_t Version = Record[0];
  const bool IsOldProfileFormat = Version == 1;
-  if (!IsOldProfileFormat && Version != 2)
+  if (Version < 1 || Version > 3)
    return error("Invalid summary version " + Twine(Version) +
-                 ", 1 or 2 expected");
+                 ", 1, 2 or 3 expected");
  Record.clear();

  // Keep around the last seen summary to be used when we see an optional
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@ -93,20 +93,29 @@ static void skipAbbreviatedField(BitstreamCursor &Cursor,
 }

 /// skipRecord - Read the current record and discard it.
-void BitstreamCursor::skipRecord(unsigned AbbrevID) {
+unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
  // Skip unabbreviated records by reading past their entries.
  if (AbbrevID == bitc::UNABBREV_RECORD) {
    unsigned Code = ReadVBR(6);
-    (void)Code;
    unsigned NumElts = ReadVBR(6);
    for (unsigned i = 0; i != NumElts; ++i)
      (void)ReadVBR64(6);
-    return;
+    return Code;
  }

  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+  const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
+  unsigned Code;
+  if (CodeOp.isLiteral())
+    Code = CodeOp.getLiteralValue();
+  else {
+    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
+        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
+      report_fatal_error("Abbreviation starts with an Array or a Blob");
+    Code = readAbbreviatedField(*this, CodeOp);
+  }

-  for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+  for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
    if (Op.isLiteral())
      continue;
@ -164,6 +173,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) {
    // Skip over the blob.
    JumpToBit(NewEnd);
  }
+  return Code;
 }

 unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
@ -273,7 +283,7 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
 }

 void BitstreamCursor::ReadAbbrevRecord() {
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  unsigned NumOpInfo = ReadVBR(5);
  for (unsigned i = 0; i != NumOpInfo; ++i) {
    bool IsLiteral = Read(1);
@ -307,7 +317,7 @@ void BitstreamCursor::ReadAbbrevRecord() {

  if (Abbv->getNumOperandInfos() == 0)
    report_fatal_error("Abbrev record with no operands");
-  CurAbbrevs.push_back(Abbv);
+  CurAbbrevs.push_back(std::move(Abbv));
 }

 Optional<BitstreamBlockInfo>
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@ -14,10 +14,12 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
@ -86,12 +88,23 @@

 using namespace llvm;

+#define DEBUG_TYPE "bitcode-reader"
+
+STATISTIC(NumMDStringLoaded, "Number of MDStrings loaded");
+STATISTIC(NumMDNodeTemporary, "Number of MDNode::Temporary created");
+STATISTIC(NumMDRecordLoaded, "Number of Metadata records loaded");
+
 /// Flag whether we need to import full type definitions for ThinLTO.
 /// Currently needed for Darwin and LLDB.
 static cl::opt<bool> ImportFullTypeDefinitions(
    "import-full-type-definitions", cl::init(false), cl::Hidden,
    cl::desc("Import full type definitions for ThinLTO."));

+static cl::opt<bool> DisableLazyLoading(
+    "disable-ondemand-mds-loading", cl::init(false), cl::Hidden,
+    cl::desc("Force disable the lazy-loading on-demand of metadata when "
+             "loading bitcode for importing."));
+
 namespace {

 static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
@ -165,6 +178,10 @@ public:
  void assignValue(Metadata *MD, unsigned Idx);
  void tryToResolveCycles();
  bool hasFwdRefs() const { return !ForwardReference.empty(); }
+  int getNextFwdRef() {
+    assert(hasFwdRefs());
+    return *ForwardReference.begin();
+  }

  /// Upgrade a type that had an MDString reference.
  void addTypeRef(MDString &UUID, DICompositeType &CT);
@ -215,6 +232,7 @@ Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
  ForwardReference.insert(Idx);

  // Create and return a placeholder, which will later be RAUW'd.
+  ++NumMDNodeTemporary;
  Metadata *MD = MDNode::getTemporary(Context, None).release();
  MetadataPtrs[Idx].reset(MD);
  return MD;
@ -340,8 +358,26 @@ class PlaceholderQueue {
  std::deque<DistinctMDOperandPlaceholder> PHs;

 public:
+  bool empty() { return PHs.empty(); }
  DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
  void flush(BitcodeReaderMetadataList &MetadataList);
+
+  /// Return the list of temporaries nodes in the queue, these need to be
+  /// loaded before we can flush the queue.
+  void getTemporaries(BitcodeReaderMetadataList &MetadataList,
+                      DenseSet<unsigned> &Temporaries) {
+    for (auto &PH : PHs) {
+      auto ID = PH.getID();
+      auto *MD = MetadataList.lookup(ID);
+      if (!MD) {
+        Temporaries.insert(ID);
+        continue;
+      }
+      auto *N = dyn_cast_or_null<MDNode>(MD);
+      if (N && N->isTemporary())
+        Temporaries.insert(ID);
+    }
+  }
 };

 } // end anonymous namespace
@ -375,6 +411,30 @@ class MetadataLoader::MetadataLoaderImpl {
  Module &TheModule;
  std::function<Type *(unsigned)> getTypeByID;

+  /// Cursor associated with the lazy-loading of Metadata. This is the easy way
+  /// to keep around the right "context" (Abbrev list) to be able to jump in
+  /// the middle of the metadata block and load any record.
+  BitstreamCursor IndexCursor;
+
+  /// Index that keeps track of MDString values.
+  std::vector<StringRef> MDStringRef;
+
+  /// On-demand loading of a single MDString. Requires the index above to be
+  /// populated.
+  MDString *lazyLoadOneMDString(unsigned Idx);
+
+  /// Index that keeps track of where to find a metadata record in the stream.
+  std::vector<uint64_t> GlobalMetadataBitPosIndex;
+
+  /// Populate the index above to enable lazily loading of metadata, and load
+  /// the named metadata as well as the transitively referenced global
+  /// Metadata.
+  Expected<bool> lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders);
+
+  /// On-demand loading of a single metadata. Requires the index above to be
+  /// populated.
+  void lazyLoadOneMetadata(unsigned Idx, PlaceholderQueue &Placeholders);
+
  // Keep mapping of seens pair of old-style CU <-> SP, and update pointers to
  // point from SP to CU after a block is completly parsed.
  std::vector<std::pair<DICompileUnit *, Metadata *>> CUSubprograms;
@ -394,13 +454,25 @@ class MetadataLoader::MetadataLoaderImpl {

  Error parseOneMetadata(SmallVectorImpl<uint64_t> &Record, unsigned Code,
                         PlaceholderQueue &Placeholders, StringRef Blob,
-                         bool ModuleLevel, unsigned &NextMetadataNo);
+                         unsigned &NextMetadataNo);
  Error parseMetadataStrings(ArrayRef<uint64_t> Record, StringRef Blob,
-                             unsigned &NextMetadataNo);
+                             std::function<void(StringRef)> CallBack);
  Error parseGlobalObjectAttachment(GlobalObject &GO,
                                    ArrayRef<uint64_t> Record);
  Error parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);

+  void resolveForwardRefsAndPlaceholders(PlaceholderQueue &Placeholders);
+
+  /// Upgrade old-style CU <-> SP pointers to point from SP to CU.
+  void upgradeCUSubprograms() {
+    for (auto CU_SP : CUSubprograms)
+      if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
+        for (auto &Op : SPs->operands())
+          if (auto *SP = dyn_cast_or_null<MDNode>(Op))
+            SP->replaceOperandWith(7, CU_SP.first);
+    CUSubprograms.clear();
+  }
+
 public:
  MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule,
                     BitcodeReaderValueList &ValueList,
@ -444,20 +516,217 @@ Error error(const Twine &Message) {
      Message, make_error_code(BitcodeError::CorruptedBitcode));
 }

+Expected<bool> MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock(
+    PlaceholderQueue &Placeholders) {
+  IndexCursor = Stream;
+  SmallVector<uint64_t, 64> Record;
+  // Get the abbrevs, and preload record positions to make them lazy-loadable.
+  while (true) {
+    BitstreamEntry Entry = IndexCursor.advanceSkippingSubblocks(
+        BitstreamCursor::AF_DontPopBlockAtEnd);
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock: {
+      return true;
+    }
+    case BitstreamEntry::Record: {
+      // The interesting case.
+      ++NumMDRecordLoaded;
+      uint64_t CurrentPos = IndexCursor.GetCurrentBitNo();
+      auto Code = IndexCursor.skipRecord(Entry.ID);
+      switch (Code) {
+      case bitc::METADATA_STRINGS: {
+        // Rewind and parse the strings.
+        IndexCursor.JumpToBit(CurrentPos);
+        StringRef Blob;
+        Record.clear();
+        IndexCursor.readRecord(Entry.ID, Record, &Blob);
+        unsigned NumStrings = Record[0];
+        MDStringRef.reserve(NumStrings);
+        auto IndexNextMDString = [&](StringRef Str) {
+          MDStringRef.push_back(Str);
+        };
+        if (auto Err = parseMetadataStrings(Record, Blob, IndexNextMDString))
+          return std::move(Err);
+        break;
+      }
+      case bitc::METADATA_INDEX_OFFSET: {
+        // This is the offset to the index, when we see this we skip all the
+        // records and load only an index to these.
+        IndexCursor.JumpToBit(CurrentPos);
+        Record.clear();
+        IndexCursor.readRecord(Entry.ID, Record);
+        if (Record.size() != 2)
+          return error("Invalid record");
+        auto Offset = Record[0] + (Record[1] << 32);
+        auto BeginPos = IndexCursor.GetCurrentBitNo();
+        IndexCursor.JumpToBit(BeginPos + Offset);
+        Entry = IndexCursor.advanceSkippingSubblocks(
+            BitstreamCursor::AF_DontPopBlockAtEnd);
+        assert(Entry.Kind == BitstreamEntry::Record &&
+               "Corrupted bitcode: Expected `Record` when trying to find the "
+               "Metadata index");
+        Record.clear();
+        auto Code = IndexCursor.readRecord(Entry.ID, Record);
+        (void)Code;
+        assert(Code == bitc::METADATA_INDEX && "Corrupted bitcode: Expected "
+                                               "`METADATA_INDEX` when trying "
+                                               "to find the Metadata index");
+
+        // Delta unpack
+        auto CurrentValue = BeginPos;
+        GlobalMetadataBitPosIndex.reserve(Record.size());
+        for (auto &Elt : Record) {
+          CurrentValue += Elt;
+          GlobalMetadataBitPosIndex.push_back(CurrentValue);
+        }
+        break;
+      }
+      case bitc::METADATA_INDEX:
+        // We don't expect to get there, the Index is loaded when we encounter
+        // the offset.
+        return error("Corrupted Metadata block");
+      case bitc::METADATA_NAME: {
+        // Named metadata need to be materialized now and aren't deferred.
+        IndexCursor.JumpToBit(CurrentPos);
+        Record.clear();
+        unsigned Code = IndexCursor.readRecord(Entry.ID, Record);
+        assert(Code == bitc::METADATA_NAME);
+
+        // Read name of the named metadata.
+        SmallString<8> Name(Record.begin(), Record.end());
+        Code = IndexCursor.ReadCode();
+
+        // Named Metadata comes in two parts, we expect the name to be followed
+        // by the node
+        Record.clear();
+        unsigned NextBitCode = IndexCursor.readRecord(Code, Record);
+        assert(NextBitCode == bitc::METADATA_NAMED_NODE);
+        (void)NextBitCode;
+
+        // Read named metadata elements.
+        unsigned Size = Record.size();
+        NamedMDNode *NMD = TheModule.getOrInsertNamedMetadata(Name);
+        for (unsigned i = 0; i != Size; ++i) {
+          // FIXME: We could use a placeholder here, however NamedMDNode are
+          // taking MDNode as operand and not using the Metadata infrastructure.
+          // It is acknowledged by 'TODO: Inherit from Metadata' in the
+          // NamedMDNode class definition.
+          MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
+          assert(MD && "Invalid record");
+          NMD->addOperand(MD);
+        }
+        break;
+      }
+      case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
+        // FIXME: we need to do this early because we don't materialize global
+        // value explicitly.
+        IndexCursor.JumpToBit(CurrentPos);
+        Record.clear();
+        IndexCursor.readRecord(Entry.ID, Record);
+        if (Record.size() % 2 == 0)
+          return error("Invalid record");
+        unsigned ValueID = Record[0];
+        if (ValueID >= ValueList.size())
+          return error("Invalid record");
+        if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
+          if (Error Err = parseGlobalObjectAttachment(
+                  *GO, ArrayRef<uint64_t>(Record).slice(1)))
+            return std::move(Err);
+        break;
+      }
+      case bitc::METADATA_KIND:
+      case bitc::METADATA_STRING_OLD:
+      case bitc::METADATA_OLD_FN_NODE:
+      case bitc::METADATA_OLD_NODE:
+      case bitc::METADATA_VALUE:
+      case bitc::METADATA_DISTINCT_NODE:
+      case bitc::METADATA_NODE:
+      case bitc::METADATA_LOCATION:
+      case bitc::METADATA_GENERIC_DEBUG:
+      case bitc::METADATA_SUBRANGE:
+      case bitc::METADATA_ENUMERATOR:
+      case bitc::METADATA_BASIC_TYPE:
+      case bitc::METADATA_DERIVED_TYPE:
+      case bitc::METADATA_COMPOSITE_TYPE:
+      case bitc::METADATA_SUBROUTINE_TYPE:
+      case bitc::METADATA_MODULE:
+      case bitc::METADATA_FILE:
+      case bitc::METADATA_COMPILE_UNIT:
+      case bitc::METADATA_SUBPROGRAM:
+      case bitc::METADATA_LEXICAL_BLOCK:
+      case bitc::METADATA_LEXICAL_BLOCK_FILE:
+      case bitc::METADATA_NAMESPACE:
+      case bitc::METADATA_MACRO:
+      case bitc::METADATA_MACRO_FILE:
+      case bitc::METADATA_TEMPLATE_TYPE:
+      case bitc::METADATA_TEMPLATE_VALUE:
+      case bitc::METADATA_GLOBAL_VAR:
+      case bitc::METADATA_LOCAL_VAR:
+      case bitc::METADATA_EXPRESSION:
+      case bitc::METADATA_OBJC_PROPERTY:
+      case bitc::METADATA_IMPORTED_ENTITY:
+      case bitc::METADATA_GLOBAL_VAR_EXPR:
+        // We don't expect to see any of these, if we see one, give up on
+        // lazy-loading and fallback.
+        MDStringRef.clear();
+        GlobalMetadataBitPosIndex.clear();
+        return false;
+      }
+      break;
+    }
+    }
+  }
+}
+
 /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
 /// module level metadata.
 Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
  if (!ModuleLevel && MetadataList.hasFwdRefs())
    return error("Invalid metadata: fwd refs into function blocks");

+  // Record the entry position so that we can jump back here and efficiently
+  // skip the whole block in case we lazy-load.
+  auto EntryPos = Stream.GetCurrentBitNo();
+
  if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
    return error("Invalid record");

-  unsigned NextMetadataNo = MetadataList.size();
  SmallVector<uint64_t, 64> Record;
-
  PlaceholderQueue Placeholders;

+  // We lazy-load module-level metadata: we build an index for each record, and
+  // then load individual record as needed, starting with the named metadata.
+  if (ModuleLevel && IsImporting && MetadataList.empty() &&
+      !DisableLazyLoading) {
+    auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders);
+    if (!SuccessOrErr)
+      return SuccessOrErr.takeError();
+    if (SuccessOrErr.get()) {
+      // An index was successfully created and we will be able to load metadata
+      // on-demand.
+      MetadataList.resize(MDStringRef.size() +
+                          GlobalMetadataBitPosIndex.size());
+
+      // Reading the named metadata created forward references and/or
+      // placeholders, that we flush here.
+      resolveForwardRefsAndPlaceholders(Placeholders);
+      upgradeCUSubprograms();
+      // Return at the beginning of the block, since it is easy to skip it
+      // entirely from there.
+      Stream.ReadBlockEnd(); // Pop the abbrev block context.
+      Stream.JumpToBit(EntryPos);
+      if (Stream.SkipBlock())
+        return error("Invalid record");
+      return Error::success();
+    }
+    // Couldn't load an index, fallback to loading all the block "old-style".
+  }
+
+  unsigned NextMetadataNo = MetadataList.size();
+
  // Read all the records.
  while (true) {
    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@ -467,16 +736,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
    case BitstreamEntry::Error:
      return error("Malformed block");
    case BitstreamEntry::EndBlock:
-      // Upgrade old-style CU <-> SP pointers to point from SP to CU.
-      for (auto CU_SP : CUSubprograms)
-        if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
-          for (auto &Op : SPs->operands())
-            if (auto *SP = dyn_cast_or_null<MDNode>(Op))
-              SP->replaceOperandWith(7, CU_SP.first);
-      CUSubprograms.clear();
-
-      MetadataList.tryToResolveCycles();
-      Placeholders.flush(MetadataList);
+      resolveForwardRefsAndPlaceholders(Placeholders);
+      upgradeCUSubprograms();
      return Error::success();
    case BitstreamEntry::Record:
      // The interesting case.
@ -486,20 +747,86 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
    // Read a record.
    Record.clear();
    StringRef Blob;
+    ++NumMDRecordLoaded;
    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
-    if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob,
-                                     ModuleLevel, NextMetadataNo))
+    if (Error Err =
+            parseOneMetadata(Record, Code, Placeholders, Blob, NextMetadataNo))
      return Err;
  }
 }

+MDString *MetadataLoader::MetadataLoaderImpl::lazyLoadOneMDString(unsigned ID) {
+  ++NumMDStringLoaded;
+  if (Metadata *MD = MetadataList.lookup(ID))
+    return cast<MDString>(MD);
+  auto MDS = MDString::get(Context, MDStringRef[ID]);
+  MetadataList.assignValue(MDS, ID);
+  return MDS;
+}
+
+void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
+    unsigned ID, PlaceholderQueue &Placeholders) {
+  assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size());
+  assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString");
+#ifndef NDEBUG
+  // Lookup first if the metadata hasn't already been loaded.
+  if (auto *MD = MetadataList.lookup(ID)) {
+    auto *N = dyn_cast_or_null<MDNode>(MD);
+    assert(N && N->isTemporary() && "Lazy loading an already loaded metadata");
+  }
+#endif
+  SmallVector<uint64_t, 64> Record;
+  StringRef Blob;
+  IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);
+  auto Entry = IndexCursor.advanceSkippingSubblocks();
+  ++NumMDRecordLoaded;
+  unsigned Code = IndexCursor.readRecord(Entry.ID, Record, &Blob);
+  if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob, ID))
+    report_fatal_error("Can't lazyload MD");
+}
+
+/// Ensure that all forward-references and placeholders are resolved.
+/// Iteratively lazy-loading metadata on-demand if needed.
+void MetadataLoader::MetadataLoaderImpl::resolveForwardRefsAndPlaceholders(
+    PlaceholderQueue &Placeholders) {
+  DenseSet<unsigned> Temporaries;
+  while (1) {
+    // Populate Temporaries with the placeholders that haven't been loaded yet.
+    Placeholders.getTemporaries(MetadataList, Temporaries);
+
+    // If we don't have any temporary, or FwdReference, we're done!
+    if (Temporaries.empty() && !MetadataList.hasFwdRefs())
+      break;
+
+    // First, load all the temporaries. This can add new placeholders or
+    // forward references.
+    for (auto ID : Temporaries)
+      lazyLoadOneMetadata(ID, Placeholders);
+    Temporaries.clear();
+
+    // Second, load the forward-references. This can also add new placeholders
+    // or forward references.
+    while (MetadataList.hasFwdRefs())
+      lazyLoadOneMetadata(MetadataList.getNextFwdRef(), Placeholders);
+  }
+  // At this point we don't have any forward reference remaining, or temporary
+  // that haven't been loaded. We can safely drop RAUW support and mark cycles
+  // as resolved.
+  MetadataList.tryToResolveCycles();
+
+  // Finally, everything is in place, we can replace the placeholders operands
+  // with the final node they refer to.
+  Placeholders.flush(MetadataList);
+}
+
 Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
    SmallVectorImpl<uint64_t> &Record, unsigned Code,
-    PlaceholderQueue &Placeholders, StringRef Blob, bool ModuleLevel,
-    unsigned &NextMetadataNo) {
+    PlaceholderQueue &Placeholders, StringRef Blob, unsigned &NextMetadataNo) {

  bool IsDistinct = false;
  auto getMD = [&](unsigned ID) -> Metadata * {
+    if (ID < MDStringRef.size())
+      return lazyLoadOneMDString(ID);
    if (!IsDistinct)
      return MetadataList.getMetadataFwdRef(ID);
    if (auto *MD = MetadataList.getMetadataIfResolved(ID))
@ -519,7 +846,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
  auto getMDString = [&](unsigned ID) -> MDString * {
    // This requires that the ID is not really a forward reference.  In
    // particular, the MDString must already have been resolved.
-    return cast_or_null<MDString>(getMDOrNull(ID));
+    auto MDS = getMDOrNull(ID);
+    return cast_or_null<MDString>(MDS);
  };

  // Support for old type refs.
@ -539,6 +867,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
    Record.clear();
    Code = Stream.ReadCode();

+    ++NumMDRecordLoaded;
    unsigned NextBitCode = Stream.readRecord(Code, Record);
    if (NextBitCode != bitc::METADATA_NAMED_NODE)
      return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
@ -1137,15 +1466,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(

    // Test for upgrading !llvm.loop.
    HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
-
+    ++NumMDStringLoaded;
    Metadata *MD = MDString::get(Context, String);
    MetadataList.assignValue(MD, NextMetadataNo++);
    break;
  }
-  case bitc::METADATA_STRINGS:
-    if (Error Err = parseMetadataStrings(Record, Blob, NextMetadataNo))
+  case bitc::METADATA_STRINGS: {
+    auto CreateNextMDString = [&](StringRef Str) {
+      ++NumMDStringLoaded;
+      MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++);
+    };
+    if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
      return Err;
    break;
+  }
  case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
    if (Record.size() % 2 == 0)
      return error("Invalid record");
@ -1166,12 +1500,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
    break;
  }
  }
-#undef GET_OR_DISTINCT
  return Error::success();
+#undef GET_OR_DISTINCT
 }

 Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
-    ArrayRef<uint64_t> Record, StringRef Blob, unsigned &NextMetadataNo) {
+    ArrayRef<uint64_t> Record, StringRef Blob,
+    std::function<void(StringRef)> CallBack) {
  // All the MDStrings in the block are emitted together in a single
  // record.  The strings are concatenated and stored in a blob along with
  // their sizes.
@ -1197,8 +1532,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
    if (Strings.size() < Size)
      return error("Invalid record: metadata strings truncated chars");

-    MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
-                             NextMetadataNo++);
+    CallBack(Strings.slice(0, Size));
    Strings = Strings.drop_front(Size);
  } while (--NumStrings);

@ -1228,6 +1562,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(

  SmallVector<uint64_t, 64> Record;

+  PlaceholderQueue Placeholders;
+
  while (true) {
    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();

@ -1236,6 +1572,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
    case BitstreamEntry::Error:
      return error("Malformed block");
    case BitstreamEntry::EndBlock:
+      resolveForwardRefsAndPlaceholders(Placeholders);
      return Error::success();
    case BitstreamEntry::Record:
      // The interesting case.
@ -1244,6 +1581,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(

    // Read a metadata attachment record.
    Record.clear();
+    ++NumMDRecordLoaded;
    switch (Stream.readRecord(Entry.ID, Record)) {
    default: // Default behavior: ignore.
      break;
@ -1268,7 +1606,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
        if (I->second == LLVMContext::MD_tbaa && StripTBAA)
          continue;

-        Metadata *Node = MetadataList.getMetadataFwdRef(Record[i + 1]);
+        auto Idx = Record[i + 1];
+        if (Idx < (MDStringRef.size() + GlobalMetadataBitPosIndex.size()) &&
+            !MetadataList.lookup(Idx))
+          // Load the attachment if it is in the lazy-loadable range and hasn't
+          // been loaded yet.
+          lazyLoadOneMetadata(Idx, Placeholders);
+
+        Metadata *Node = MetadataList.getMetadataFwdRef(Idx);
        if (isa<LocalAsMetadata>(Node))
          // Drop the attachment.  This used to be legal, but there's no
          // upgrade path.
@ -1331,6 +1676,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {

    // Read a record.
    Record.clear();
+    ++NumMDRecordLoaded;
    unsigned Code = Stream.readRecord(Entry.ID, Record);
    switch (Code) {
    default: // Default behavior: ignore.
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@ -784,53 +784,53 @@ void ModuleBitcodeWriter::writeTypeTable() {
  uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies();

  // Abbrev for TYPE_CODE_POINTER.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
  Abbv->Add(BitCodeAbbrevOp(0));  // Addrspace = 0
-  unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for TYPE_CODE_FUNCTION.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // isvararg
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));

-  unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for TYPE_CODE_STRUCT_ANON.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));

-  unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for TYPE_CODE_STRUCT_NAME.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
-  unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for TYPE_CODE_STRUCT_NAMED.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));

-  unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for TYPE_CODE_ARRAY.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // size
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));

-  unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Emit an entry count so the reader can reserve space.
  TypeVals.push_back(TypeList.size());
@ -971,9 +971,8 @@ static unsigned getEncodedLinkage(const GlobalValue &GV) {
 static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
  uint64_t RawFlags = 0;

-  RawFlags |= Flags.NoRename; // bool
-  RawFlags |= (Flags.IsNotViableToInline << 1);
-  RawFlags |= (Flags.HasInlineAsmMaybeReferencingInternal << 2);
+  RawFlags |= Flags.NotEligibleToImport; // bool
+  RawFlags |= (Flags.LiveRoot << 1);
  // Linkage don't need to be remapped at that time for the summary. Any future
  // change to the getEncodedLinkage() function will need to be taken into
  // account here as well.
@ -1059,13 +1058,13 @@ void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
  // which is written after the function blocks so that it can include
  // the offset of each function. The placeholder offset will be
  // updated when the real VST is written.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET));
  // Blocks are 32-bit aligned, so we can use a 32-bit word offset to
  // hold the real VST offset. Must use fixed instead of VBR as we don't
  // know how many VBR chunks to reserve ahead of time.
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
-  unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Emit the placeholder
  uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0};
@ -1155,7 +1154,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
  unsigned SimpleGVarAbbrev = 0;
  if (!M.global_empty()) {
    // Add an abbrev for common globals with no visibility or thread localness.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                              Log2_32_Ceil(MaxGlobalType+1)));
@ -1177,7 +1176,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
      Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                               Log2_32_Ceil(SectionMap.size()+1)));
    // Don't bother emitting vis + thread local.
-    SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+    SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv));
  }

  // Emit the global variable information.
@ -1285,11 +1284,11 @@ void ModuleBitcodeWriter::writeModuleInfo() {
      AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);

    // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(AbbrevOpToUse);
-    unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv);
+    unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));

    for (const auto P : M.getSourceFileName())
      Vals.push_back((unsigned char)P);
@ -1360,14 +1359,14 @@ void ModuleBitcodeWriter::writeMDTuple(const MDTuple *N,
 unsigned ModuleBitcodeWriter::createDILocationAbbrev() {
  // Assume the column is usually under 128, and always output the inlined-at
  // location (it's never more expensive than building an array size 1).
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
-  return Stream.EmitAbbrev(Abbv);
+  return Stream.EmitAbbrev(std::move(Abbv));
 }

 void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
@ -1389,7 +1388,7 @@ void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
 unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() {
  // Assume the column is usually under 128, and always output the inlined-at
  // location (it's never more expensive than building an array size 1).
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
@ -1397,7 +1396,7 @@ unsigned ModuleBitcodeWriter::createGenericDINodeAbbrev() {
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
-  return Stream.EmitAbbrev(Abbv);
+  return Stream.EmitAbbrev(std::move(Abbv));
 }

 void ModuleBitcodeWriter::writeGenericDINode(const GenericDINode *N,
@ -1790,11 +1789,11 @@ void ModuleBitcodeWriter::writeDIImportedEntity(
 }

 unsigned ModuleBitcodeWriter::createNamedMetadataAbbrev() {
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-  return Stream.EmitAbbrev(Abbv);
+  return Stream.EmitAbbrev(std::move(Abbv));
 }

 void ModuleBitcodeWriter::writeNamedMetadata(
@ -1819,12 +1818,12 @@ void ModuleBitcodeWriter::writeNamedMetadata(
 }

 unsigned ModuleBitcodeWriter::createMetadataStringsAbbrev() {
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-  return Stream.EmitAbbrev(Abbv);
+  return Stream.EmitAbbrev(std::move(Abbv));
 }

 /// Write out a record for MDString.
@ -1918,17 +1917,17 @@ void ModuleBitcodeWriter::writeModuleMetadata() {
  MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
      createGenericDINodeAbbrev();

-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
-  unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned OffsetAbbrev = Stream.EmitAbbrev(std::move(Abbv));

-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
-  unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned IndexAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Emit MDStrings together upfront.
  writeMetadataStrings(VE.getMDStrings(), Record);
@ -2125,30 +2124,30 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
  // If this is a constant pool for the module, emit module-specific abbrevs.
  if (isGlobal) {
    // Abbrev for CST_CODE_AGGREGATE.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
-    AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+    AggregateAbbrev = Stream.EmitAbbrev(std::move(Abbv));

    // Abbrev for CST_CODE_STRING.
-    Abbv = new BitCodeAbbrev();
+    Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-    String8Abbrev = Stream.EmitAbbrev(Abbv);
+    String8Abbrev = Stream.EmitAbbrev(std::move(Abbv));
    // Abbrev for CST_CODE_CSTRING.
-    Abbv = new BitCodeAbbrev();
+    Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
-    CString7Abbrev = Stream.EmitAbbrev(Abbv);
+    CString7Abbrev = Stream.EmitAbbrev(std::move(Abbv));
    // Abbrev for CST_CODE_CSTRING.
-    Abbv = new BitCodeAbbrev();
+    Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
-    CString6Abbrev = Stream.EmitAbbrev(Abbv);
+    CString6Abbrev = Stream.EmitAbbrev(std::move(Abbv));
  }

  SmallVector<uint64_t, 64> Record;
@ -2858,39 +2857,39 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
  unsigned GUIDEntryAbbrev;
  if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
    // 8-bit fixed-width VST_CODE_FNENTRY function strings.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-    FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+    FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));

    // 7-bit fixed width VST_CODE_FNENTRY function strings.
-    Abbv = new BitCodeAbbrev();
+    Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
-    FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+    FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));

    // 6-bit char6 VST_CODE_FNENTRY function strings.
-    Abbv = new BitCodeAbbrev();
+    Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
-    FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+    FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv));

    // FIXME: Change the name of this record as it is now used by
    // the per-module index as well.
-    Abbv = new BitCodeAbbrev();
+    Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
-    GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv);
+    GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));
  }

  // FIXME: Set up the abbrev, we know how many values there are!
@ -2984,11 +2983,11 @@ void IndexBitcodeWriter::writeCombinedValueSymbolTable() {

  Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);

-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
-  unsigned EntryAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  SmallVector<uint64_t, 64> NameVals;
  for (const auto &GVI : valueIds()) {
@ -3121,7 +3120,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
  Stream.EnterBlockInfoBlock();

  { // 8-bit fixed-width VST_CODE_ENTRY/VST_CODE_BBENTRY strings.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@ -3132,7 +3131,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
  }

  { // 7-bit fixed width VST_CODE_ENTRY strings.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@ -3142,7 +3141,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // 6-bit char6 VST_CODE_ENTRY strings.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@ -3152,7 +3151,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // 6-bit char6 VST_CODE_BBENTRY strings.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
@ -3165,7 +3164,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {


  { // SETTYPE abbrev for CONSTANTS_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                              VE.computeBitsRequiredForTypeIndicies()));
@ -3175,7 +3174,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
  }

  { // INTEGER abbrev for CONSTANTS_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
@ -3184,7 +3183,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
  }

  { // CE_CAST abbrev for CONSTANTS_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // cast opc
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // typeid
@ -3196,7 +3195,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // NULL abbrev for CONSTANTS_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) !=
        CONSTANTS_NULL_Abbrev)
@ -3206,7 +3205,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
  // FIXME: This should only use space for first class types!

  { // INST_LOAD abbrev for FUNCTION_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,    // dest ty
@ -3218,7 +3217,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // INST_BINOP abbrev for FUNCTION_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
@ -3228,7 +3227,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
@ -3239,7 +3238,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // INST_CAST abbrev for FUNCTION_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));    // OpVal
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // dest ty
@ -3251,14 +3250,14 @@ void ModuleBitcodeWriter::writeBlockInfo() {
  }

  { // INST_RET abbrev for FUNCTION_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
        FUNCTION_INST_RET_VOID_ABBREV)
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // INST_RET abbrev for FUNCTION_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
@ -3266,14 +3265,14 @@ void ModuleBitcodeWriter::writeBlockInfo() {
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
        FUNCTION_INST_UNREACHABLE_ABBREV)
      llvm_unreachable("Unexpected abbrev ordering!");
  }
  {
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
@ -3296,38 +3295,38 @@ void IndexBitcodeWriter::writeModStrings() {
  // TODO: See which abbrev sizes we actually need to emit

  // 8-bit fixed-width MST_ENTRY strings.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
-  unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv);
+  unsigned Abbrev8Bit = Stream.EmitAbbrev(std::move(Abbv));

  // 7-bit fixed width MST_ENTRY strings.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
-  unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv);
+  unsigned Abbrev7Bit = Stream.EmitAbbrev(std::move(Abbv));

  // 6-bit char6 MST_ENTRY strings.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
-  unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
+  unsigned Abbrev6Bit = Stream.EmitAbbrev(std::move(Abbv));

  // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
-  unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
+  unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));

  SmallVector<unsigned, 64> Vals;
  for (const auto &MPSE : Index.modulePaths()) {
@ -3435,7 +3434,7 @@ void ModuleBitcodeWriter::writeModuleLevelReferences(
 // Current version for the summary.
 // This is bumped whenever we introduce changes in the way some record are
 // interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 2;
+static const uint64_t INDEX_VERSION = 3;

 /// Emit the per-module summary section alongside the rest of
 /// the module's bitcode.
@ -3450,7 +3449,7 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
  }

  // Abbrev for FS_PERMODULE.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
@ -3459,10 +3458,10 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
  // numrefs x valueid, n x (valueid)
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-  unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for FS_PERMODULE_PROFILE.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
@ -3471,24 +3470,24 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
  // numrefs x valueid, n x (valueid, hotness)
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-  unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));  // valueids
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-  unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for FS_ALIAS.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
-  unsigned FSAliasAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  SmallVector<uint64_t, 64> NameVals;
  // Iterate over the list of functions instead of the Index to
@ -3542,7 +3541,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
  Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});

  // Abbrev for FS_COMBINED.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
@ -3552,10 +3551,10 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
  // numrefs x valueid, n x (valueid)
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-  unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSCallsAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for FS_COMBINED_PROFILE.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
@ -3565,26 +3564,26 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
  // numrefs x valueid, n x (valueid, hotness)
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-  unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for FS_COMBINED_GLOBALVAR_INIT_REFS.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));    // valueids
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-  unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // Abbrev for FS_COMBINED_ALIAS.
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALIAS));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
-  unsigned FSAliasAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));

  // The aliases are emitted as a post-pass, and will point to the value
  // id of the aliasee. Save them in a vector for post-processing.
@ -3702,19 +3701,19 @@ void writeIdentificationBlock(BitstreamWriter &Stream) {
  Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);

  // Write the "user readable" string identifying the bitcode producer
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
-  auto StringAbbrev = Stream.EmitAbbrev(Abbv);
+  auto StringAbbrev = Stream.EmitAbbrev(std::move(Abbv));
  writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
                    "LLVM" LLVM_VERSION_STRING, StringAbbrev);

  // Write the epoch version
-  Abbv = new BitCodeAbbrev();
+  Abbv = std::make_shared<BitCodeAbbrev>();
  Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
-  auto EpochAbbrev = Stream.EmitAbbrev(Abbv);
+  auto EpochAbbrev = Stream.EmitAbbrev(std::move(Abbv));
  SmallVector<unsigned, 1> Vals = {bitc::BITCODE_CURRENT_EPOCH};
  Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev);
  Stream.ExitBlock();
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@ -53,7 +53,8 @@ void ARMException::beginFunction(const MachineFunction *MF) {

  if (MoveType == AsmPrinter::CFI_M_Debug) {
    if (!hasEmittedCFISections) {
-      Asm->OutStreamer->EmitCFISections(false, true);
+      if (Asm->needsOnlyDebugCFIMoves())
+        Asm->OutStreamer->EmitCFISections(false, true);
      hasEmittedCFISections = true;
    }

--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -108,7 +108,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
 AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
    : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
      OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
-      LastMI(nullptr), LastFn(0), Counter(~0U) {
+      isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
  DD = nullptr;
  MMI = nullptr;
  LI = nullptr;
@ -264,6 +264,28 @@ bool AsmPrinter::doInitialization(Module &M) {
    }
  }

+  switch (MAI->getExceptionHandlingType()) {
+  case ExceptionHandling::SjLj:
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::ARM:
+    isCFIMoveForDebugging = true;
+    if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+      break;
+    for (auto &F: M.getFunctionList()) {
+      // If the module contains any function with unwind data,
+      // .eh_frame has to be emitted.
+      // Ignore functions that won't get emitted.
+      if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) {
+        isCFIMoveForDebugging = false;
+        break;
+      }
+    }
+    break;
+  default:
+    isCFIMoveForDebugging = false;
+    break;
+  }
+
  EHStreamer *ES = nullptr;
  switch (MAI->getExceptionHandlingType()) {
  case ExceptionHandling::None:
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@ -100,6 +100,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
  }

  SourceMgr SrcMgr;
+  SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
  SrcMgrDiagInfo DiagInfo;

  // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@ -137,7 +137,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
    return;

  if (!hasEmittedCFISections) {
-    if (Asm->needsCFIMoves() == AsmPrinter::CFI_M_Debug)
+    if (Asm->needsOnlyDebugCFIMoves())
      Asm->OutStreamer->EmitCFISections(false, true);
    hasEmittedCFISections = true;
  }
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@ -125,8 +125,11 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
 MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
  MachineBasicBlock *&MBB = BBToMBB[&BB];
  if (!MBB) {
-    MBB = MF->CreateMachineBasicBlock();
+    MBB = MF->CreateMachineBasicBlock(&BB);
    MF->push_back(MBB);
+
+    if (BB.hasAddressTaken())
+      MBB->setHasAddressTaken();
  }
  return *MBB;
 }
@ -195,6 +198,45 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
  return true;
 }

+bool IRTranslator::translateSwitch(const User &U,
+                                   MachineIRBuilder &MIRBuilder) {
+  // For now, just translate as a chain of conditional branches.
+  // FIXME: could we share most of the logic/code in
+  // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
+  // At first sight, it seems most of the logic in there is independent of
+  // SelectionDAG-specifics and a lot of work went in to optimize switch
+  // lowering in there.
+
+  const SwitchInst &SwInst = cast<SwitchInst>(U);
+  const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+
+  LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+  for (auto &CaseIt : SwInst.cases()) {
+    const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
+    const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
+    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
+    MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+    MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+
+    MIRBuilder.buildBrCond(Tst, TrueBB);
+    CurBB.addSuccessor(&TrueBB);
+
+    MachineBasicBlock *FalseBB =
+        MF->CreateMachineBasicBlock(SwInst.getParent());
+    MF->push_back(FalseBB);
+    MIRBuilder.buildBr(*FalseBB);
+    CurBB.addSuccessor(FalseBB);
+
+    MIRBuilder.setMBB(*FalseBB);
+  }
+  // handle default case
+  MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
+  MIRBuilder.buildBr(DefaultBB);
+  MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+
+  return true;
+}
+
 bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
  const LoadInst &LI = cast<LoadInst>(U);

--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@ -55,11 +55,10 @@ const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
 RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
                                   unsigned NumRegBanks)
    : RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
-  DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+  for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx)
    assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
-    assert(!RegBanks[Idx]->isValid() &&
-           "RegisterBank should be invalid before initialization");
-  });
+#endif // NDEBUG
 }

 RegisterBankInfo::~RegisterBankInfo() {
@ -70,13 +69,15 @@ RegisterBankInfo::~RegisterBankInfo() {
 }

 bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
-  DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+  for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
    const RegisterBank &RegBank = getRegBank(Idx);
    assert(Idx == RegBank.getID() &&
           "ID does not match the index in the array");
    dbgs() << "Verify " << RegBank << '\n';
    assert(RegBank.verify(TRI) && "RegBank is invalid");
-  });
+  }
+#endif // NDEBUG
  return true;
 }

--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@ -1495,16 +1495,18 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
    if (TII->reverseBranchCondition(Cond))
      llvm_unreachable("Unable to reverse branch condition!");

-  // Initialize liveins to the first BB. These are potentiall redefined by
-  // predicated instructions.
  Redefs.init(*TRI);
-  Redefs.addLiveIns(CvtMBB);
-  Redefs.addLiveIns(NextMBB);
-
-  // Compute a set of registers which must not be killed by instructions in
-  // BB1: This is everything live-in to BB2.
  DontKill.init(*TRI);
-  DontKill.addLiveIns(NextMBB);
+
+  if (MRI->tracksLiveness()) {
+    // Initialize liveins to the first BB. These are potentiall redefined by
+    // predicated instructions.
+    Redefs.addLiveIns(CvtMBB);
+    Redefs.addLiveIns(NextMBB);
+    // Compute a set of registers which must not be killed by instructions in
+    // BB1: This is everything live-in to BB2.
+    DontKill.addLiveIns(NextMBB);
+  }

  if (CvtMBB.pred_size() > 1) {
    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
@ -1602,8 +1604,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
  // Initialize liveins to the first BB. These are potentially redefined by
  // predicated instructions.
  Redefs.init(*TRI);
-  Redefs.addLiveIns(CvtMBB);
-  Redefs.addLiveIns(NextMBB);
+  if (MRI->tracksLiveness()) {
+    Redefs.addLiveIns(CvtMBB);
+    Redefs.addLiveIns(NextMBB);
+  }

  DontKill.clear();

@ -1766,8 +1770,10 @@ bool IfConverter::IfConvertDiamondCommon(
  //   instructions. We start with BB1 live-ins so we have the live-out regs
  //   after tracking the BB1 instructions.
  Redefs.init(*TRI);
-  Redefs.addLiveIns(MBB1);
-  Redefs.addLiveIns(MBB2);
+  if (MRI->tracksLiveness()) {
+    Redefs.addLiveIns(MBB1);
+    Redefs.addLiveIns(MBB2);
+  }

  // Remove the duplicated instructions at the beginnings of both paths.
  // Skip dbg_value instructions
@ -1792,12 +1798,14 @@ bool IfConverter::IfConvertDiamondCommon(
  // This is everything used+live in BB2 after the duplicated instructions. We
  // can compute this set by simulating liveness backwards from the end of BB2.
  DontKill.init(*TRI);
-  for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
-    DontKill.stepBackward(MI);
+  if (MRI->tracksLiveness()) {
+    for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
+      DontKill.stepBackward(MI);

-  for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
-    SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
-    Redefs.stepForward(MI, IgnoredClobbers);
+    for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
+      SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+      Redefs.stepForward(MI, Dummy);
+    }
  }
  BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
  MBB2.erase(MBB2.begin(), DI2);
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@ -488,16 +488,16 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
  }

  // Print the live in registers.
-  const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
-  assert(TRI && "Expected target register info");
-  if (!MBB.livein_empty()) {
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  if (MRI.tracksLiveness() && !MBB.livein_empty()) {
+    const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
    OS.indent(2) << "liveins: ";
    bool First = true;
    for (const auto &LI : MBB.liveins()) {
      if (!First)
        OS << ", ";
      First = false;
-      printReg(LI.PhysReg, OS, TRI);
+      printReg(LI.PhysReg, OS, &TRI);
      if (!LI.LaneMask.all())
        OS << ":0x" << PrintLaneMask(LI.LaneMask);
    }
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@ -286,7 +286,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
  if (!livein_empty()) {
    if (Indexes) OS << '\t';
    OS << "    Live Ins:";
-    for (const auto &LI : make_range(livein_begin(), livein_end())) {
+    for (const auto &LI : LiveIns) {
      OS << ' ' << PrintReg(LI.PhysReg, TRI);
      if (!LI.LaneMask.all())
        OS << ':' << PrintLaneMask(LI.LaneMask);
@ -1292,3 +1292,10 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
 void MachineBasicBlock::clearLiveIns() {
  LiveIns.clear();
 }
+
+MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
+  assert(getParent()->getProperties().hasProperty(
+      MachineFunctionProperties::Property::TracksLiveness) &&
+      "Liveness information is accurate");
+  return LiveIns.begin();
+}
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@ -566,7 +566,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
  FirstTerminator = nullptr;

  if (!MF->getProperties().hasProperty(
-      MachineFunctionProperties::Property::NoPHIs)) {
+      MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
    // If this block has allocatable physical registers live-in, check that
    // it is an entry block or landing pad.
    for (const auto &LI : MBB->liveins()) {
@ -741,14 +741,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
  }

  regsLive.clear();
-  for (const auto &LI : MBB->liveins()) {
-    if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
-      report("MBB live-in list contains non-physical register", MBB);
-      continue;
+  if (MRI->tracksLiveness()) {
+    for (const auto &LI : MBB->liveins()) {
+      if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+        report("MBB live-in list contains non-physical register", MBB);
+        continue;
+      }
+      for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        regsLive.insert(*SubRegs);
    }
-    for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
-         SubRegs.isValid(); ++SubRegs)
-      regsLive.insert(*SubRegs);
  }
  regsLiveInButUnused = regsLive;

--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@ -48,11 +48,6 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
  assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
         "Target changed?");

-  // It is not possible to use the register scavenger after late optimization
-  // passes that don't preserve accurate liveness information.
-  assert(MRI->tracksLiveness() &&
-         "Cannot use register scavenger with inaccurate liveness");
-
  // Self-initialize.
  if (!this->MBB) {
    NumRegUnits = TRI->getNumRegUnits();
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -40,6 +40,7 @@
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@ -7339,19 +7340,23 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
  if (!Range)
    return Op;

-  Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
-  if (!Lo->isNullValue())
+  ConstantRange CR = getConstantRangeFromMetadata(*Range);
+  if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
    return Op;

-  Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
-  unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+  APInt Lo = CR.getUnsignedMin();
+  if (!Lo.isMinValue())
+    return Op;
+
+  APInt Hi = CR.getUnsignedMax();
+  unsigned Bits = Hi.getActiveBits();

  EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);

  SDLoc SL = getCurSDLoc();

-  SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
-                             Op, DAG.getValueType(SmallVT));
+  SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
+                             DAG.getValueType(SmallVT));
  unsigned NumVals = Op.getNode()->getNumValues();
  if (NumVals == 1)
    return ZExt;
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@ -299,11 +299,8 @@ DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const {
    Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end());
  }

-  DWARFDie Child = getFirstChild();
-  while (Child) {
+  for (auto Child: children())
    Child.collectChildrenAddressRanges(Ranges);
-    Child = Child.getSibling();
-  }
 }

 bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const {
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@ -468,6 +468,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
  Options.HandleInt = Flags.handle_int;
  Options.HandleSegv = Flags.handle_segv;
  Options.HandleTerm = Flags.handle_term;
+  Options.HandleXfsz = Flags.handle_xfsz;
  SetSignalHandler(Options);

  if (Flags.minimize_crash_internal_step)
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@ -91,6 +91,7 @@ FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.")
 FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.")
 FUZZER_FLAG_INT(handle_int, 1, "If 1, try to intercept SIGINT.")
 FUZZER_FLAG_INT(handle_term, 1, "If 1, try to intercept SIGTERM.")
+FUZZER_FLAG_INT(handle_xfsz, 1, "If 1, try to intercept SIGXFSZ.")
 FUZZER_FLAG_INT(close_fd_mask, 0, "If 1, close stdout at startup; "
    "if 2, close stderr; if 3, close both. "
    "Be careful, this will also close e.g. asan's stderr/stdout.")
--- a/lib/Fuzzer/FuzzerIO.h
+++ b/lib/Fuzzer/FuzzerIO.h
@ -37,6 +37,9 @@ std::string DirPlusFile(const std::string &DirPath,
 // Returns the name of the dir, similar to the 'dirname' utility.
 std::string DirName(const std::string &FileName);

+// Returns path to a TmpDir.
+std::string TmpDir();
+
 void DupAndCloseStderr();

 void CloseStdout();
--- a/lib/Fuzzer/FuzzerIOPosix.cpp
+++ b/lib/Fuzzer/FuzzerIOPosix.cpp
@ -83,6 +83,12 @@ std::string DirName(const std::string &FileName) {
  return Res;
 }

+std::string TmpDir() {
+  if (auto Env = getenv("TMPDIR"))
+    return Env;
+  return "/tmp";
+}
+
 }  // namespace fuzzer

 #endif // LIBFUZZER_POSIX
--- a/lib/Fuzzer/FuzzerIOWindows.cpp
+++ b/lib/Fuzzer/FuzzerIOWindows.cpp
@ -277,6 +277,8 @@ std::string DirName(const std::string &FileName) {
  return FileName.substr(0, LocationLen + DirLen);
 }

+std::string TmpDir() { return "TODO: implement TmpDir"; }
+
 }  // namespace fuzzer

 #endif // LIBFUZZER_WINDOWS
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@ -82,6 +82,7 @@ public:
  static void StaticAlarmCallback();
  static void StaticCrashSignalCallback();
  static void StaticInterruptCallback();
+  static void StaticFileSizeExceedCallback();

  void ExecuteCallback(const uint8_t *Data, size_t Size);
  size_t RunOne(const uint8_t *Data, size_t Size);
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@ -266,6 +266,11 @@ void Fuzzer::StaticInterruptCallback() {
  F->InterruptCallback();
 }

+void Fuzzer::StaticFileSizeExceedCallback() {
+  Printf("==%lu== ERROR: libFuzzer: file size exceeded\n", GetPid());
+  exit(1);
+}
+
 void Fuzzer::CrashCallback() {
  Printf("==%lu== ERROR: libFuzzer: deadly signal\n", GetPid());
  if (EF->__sanitizer_print_stack_trace)
--- a/lib/Fuzzer/FuzzerMerge.cpp
+++ b/lib/Fuzzer/FuzzerMerge.cpp
@ -220,8 +220,8 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
    ListFilesInDirRecursive(Corpora[i], nullptr, &AllFiles, /*TopDir*/true);
  Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n",
         AllFiles.size(), NumFilesInFirstCorpus);
-  std::string CFPath =
-      "libFuzzerTemp." + std::to_string(GetPid()) + ".txt";
+  auto CFPath = DirPlusFile(TmpDir(),
+                       "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
  // Write the control file.
  RemoveFile(CFPath);
  std::ofstream ControlFile(CFPath);
@ -229,6 +229,11 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
  ControlFile << NumFilesInFirstCorpus << "\n";
  for (auto &Path: AllFiles)
    ControlFile << Path << "\n";
+  if (!ControlFile) {
+    Printf("MERGE-OUTER: failed to write to the control file: %s\n",
+           CFPath.c_str());
+    exit(1);
+  }
  ControlFile.close();

  // Execute the inner process untill it passes.
@ -246,6 +251,9 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
  // Read the control file and do the merge.
  Merger M;
  std::ifstream IF(CFPath);
+  IF.seekg(0, IF.end);
+  Printf("MERGE-OUTER: the control file has %zd bytes\n", (size_t)IF.tellg());
+  IF.seekg(0, IF.beg);
  M.ParseOrExit(IF, true);
  IF.close();
  std::vector<std::string> NewFiles;
--- a/lib/Fuzzer/FuzzerOptions.h
+++ b/lib/Fuzzer/FuzzerOptions.h
@ -62,6 +62,7 @@ struct FuzzingOptions {
  bool HandleInt = false;
  bool HandleSegv = false;
  bool HandleTerm = false;
+  bool HandleXfsz = false;
 };

 }  // namespace fuzzer
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@ -46,10 +46,6 @@ public:
  void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
                           const uint8_t *Data2);

-  void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
-                           size_t NumCases, uint64_t *Cases);
-  int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
-                          size_t DataSize);
  int TryToAddDesiredData(const uint8_t *PresentData,
                          const uint8_t *DesiredData, size_t DataSize);

@ -147,29 +143,6 @@ public:
  size_t AutoDictAdds = 0;
 };

-int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
-                                    size_t DataSize) {
-  if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
-  ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
-  const uint8_t *UnitData;
-  auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
-  int Res = 0;
-  const uint8_t *Beg = UnitData;
-  const uint8_t *End = Beg + UnitSize;
-  for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
-    Cur = (uint8_t *)SearchMemory(Cur, End - Cur, &PresentData, DataSize);
-    if (!Cur)
-      break;
-    size_t Pos = Cur - Beg;
-    assert(Pos < UnitSize);
-    AddMutation(Pos, DataSize, DesiredData);
-    AddMutation(Pos, DataSize, DesiredData + 1);
-    AddMutation(Pos, DataSize, DesiredData - 1);
-    Res++;
-  }
-  return Res;
-}
-
 int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
                                    const uint8_t *DesiredData,
                                    size_t DataSize) {
@ -206,26 +179,6 @@ void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
  }
 }

-void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
-                                     uint64_t Val, size_t NumCases,
-                                     uint64_t *Cases) {
-  if (F->InFuzzingThread()) return;
-  size_t ValSize = ValSizeInBits / 8;
-  bool TryShort = IsTwoByteData(Val);
-  for (size_t i = 0; i < NumCases; i++)
-    TryShort &= IsTwoByteData(Cases[i]);
-
-  if (Options.Verbosity >= 3)
-    Printf("TraceSwitch: %p %zd # %zd; TryShort %d\n", PC, Val, NumCases,
-           TryShort);
-
-  for (size_t i = 0; i < NumCases; i++) {
-    TryToAddDesiredData(Val, Cases[i], ValSize);
-    if (TryShort)
-      TryToAddDesiredData(Val, Cases[i], 2);
-  }
-}
-
 static TraceState *TS;

 void Fuzzer::StartTraceRecording() {
--- a/lib/Fuzzer/FuzzerUtilPosix.cpp
+++ b/lib/Fuzzer/FuzzerUtilPosix.cpp
@ -41,6 +41,10 @@ static void InterruptHandler(int, siginfo_t *, void *) {
  Fuzzer::StaticInterruptCallback();
 }

+static void FileSizeExceedHandler(int, siginfo_t *, void *) {
+  Fuzzer::StaticFileSizeExceedCallback();
+}
+
 static void SetSigaction(int signum,
                         void (*callback)(int, siginfo_t *, void *)) {
  struct sigaction sigact;
@ -80,6 +84,8 @@ void SetSignalHandler(const FuzzingOptions& Options) {
    SetSigaction(SIGILL, CrashHandler);
  if (Options.HandleFpe)
    SetSigaction(SIGFPE, CrashHandler);
+  if (Options.HandleXfsz)
+    SetSigaction(SIGXFSZ, FileSizeExceedHandler);
 }

 void SleepSeconds(int Seconds) {
--- a/lib/Fuzzer/FuzzerUtilWindows.cpp
+++ b/lib/Fuzzer/FuzzerUtilWindows.cpp
@ -58,6 +58,7 @@ LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) {
      if (HandlerOpt->HandleFpe)
        Fuzzer::StaticCrashSignalCallback();
      break;
+    // TODO: handle (Options.HandleXfsz)
  }
  return EXCEPTION_CONTINUE_SEARCH;
 }
--- a/lib/Fuzzer/test/merge.test
+++ b/lib/Fuzzer/test/merge.test
@ -44,3 +44,11 @@ MERGE_WITH_CRASH: MERGE-OUTER: 3 new files
 # Check that we actually limit the size with max_len
 RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2  -max_len=5 2>&1 | FileCheck %s --check-prefix=MERGE_LEN5
 MERGE_LEN5: MERGE-OUTER: succesfull in 1 attempt(s)
+
+# Check that we honor TMPDIR
+RUN: TMPDIR=DIR_DOES_NOT_EXIST not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=TMPDIR
+TMPDIR: MERGE-OUTER: failed to write to the control file: DIR_DOES_NOT_EXIST/libFuzzerTemp
+
+# Check that we can report an error if file size exceeded
+RUN: (ulimit -f 1; not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=SIGXFSZ)
+SIGXFSZ: ERROR: libFuzzer: file size exceeded
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@ -337,12 +337,21 @@ void LTO::addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
    if (Res.Prevailing)
      GlobalRes.IRName = GV->getName();
  }
+  // Set the partition to external if we know it is used elsewhere, e.g.
+  // it is visible to a regular object, is referenced from llvm.compiler_used,
+  // or was already recorded as being referenced from a different partition.
  if (Res.VisibleToRegularObj || (GV && Used.count(GV)) ||
      (GlobalRes.Partition != GlobalResolution::Unknown &&
-       GlobalRes.Partition != Partition))
+       GlobalRes.Partition != Partition)) {
    GlobalRes.Partition = GlobalResolution::External;
-  else
+  } else
+    // First recorded reference, save the current partition.
    GlobalRes.Partition = Partition;
+
+  // Flag as visible outside of ThinLTO if visible from a regular object or
+  // if this is a reference in the regular LTO partition.
+  GlobalRes.VisibleOutsideThinLTO |=
+      (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO));
 }

 static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
@ -848,6 +857,19 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
    if (!ModuleToDefinedGVSummaries.count(Mod.first))
      ModuleToDefinedGVSummaries.try_emplace(Mod.first);

+  // Compute "dead" symbols, we don't want to import/export these!
+  DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+  for (auto &Res : GlobalResolutions) {
+    if (Res.second.VisibleOutsideThinLTO &&
+        // IRName will be defined if we have seen the prevailing copy of
+        // this value. If not, no need to preserve any ThinLTO copies.
+        !Res.second.IRName.empty())
+      GUIDPreservedSymbols.insert(GlobalValue::getGUID(Res.second.IRName));
+  }
+
+  auto DeadSymbols =
+      computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
+
  StringMap<FunctionImporter::ImportMapTy> ImportLists(
      ThinLTO.ModuleMap.size());
  StringMap<FunctionImporter::ExportSetTy> ExportLists(
@ -856,12 +878,21 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,

  if (Conf.OptLevel > 0) {
    ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
-                             ImportLists, ExportLists);
+                             ImportLists, ExportLists, &DeadSymbols);

    std::set<GlobalValue::GUID> ExportedGUIDs;
    for (auto &Res : GlobalResolutions) {
-      if (!Res.second.IRName.empty() &&
-          Res.second.Partition == GlobalResolution::External)
+      // First check if the symbol was flagged as having external references.
+      if (Res.second.Partition != GlobalResolution::External)
+        continue;
+      // IRName will be defined if we have seen the prevailing copy of
+      // this value. If not, no need to mark as exported from a ThinLTO
+      // partition (and we can't get the GUID).
+      if (Res.second.IRName.empty())
+        continue;
+      auto GUID = GlobalValue::getGUID(Res.second.IRName);
+      // Mark exported unless index-based analysis determined it to be dead.
+      if (!DeadSymbols.count(GUID))
        ExportedGUIDs.insert(GlobalValue::getGUID(Res.second.IRName));
    }

--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@ -581,11 +581,18 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
  StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
  Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);

+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+      PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+  // Compute "dead" symbols, we don't want to import/export these!
+  auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
  // Generate import/export list
  StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
  StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
  ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
-                           ExportLists);
+                           ExportLists, &DeadSymbols);

  // Resolve LinkOnce/Weak symbols.
  StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
@ -594,10 +601,6 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
  thinLTOResolveWeakForLinkerModule(
      TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);

-  // Convert the preserved symbols set from string to GUID
-  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
-      PreservedSymbols, Triple(TheModule.getTargetTriple()));
-
  // Promote the exported values in the index, so that they are promoted
  // in the module.
  auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
@ -623,11 +626,18 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
  StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
  Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);

+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+      PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+  // Compute "dead" symbols, we don't want to import/export these!
+  auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
  // Generate import/export list
  StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
  StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
  ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
-                           ExportLists);
+                           ExportLists, &DeadSymbols);
  auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];

  crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
@ -697,11 +707,14 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
  StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
  Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);

+  // Compute "dead" symbols, we don't want to import/export these!
+  auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+
  // Generate import/export list
  StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
  StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
  ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
-                           ExportLists);
+                           ExportLists, &DeadSymbols);
  auto &ExportList = ExportLists[ModuleIdentifier];

  // Be friendly and don't nuke totally the module when the client didn't
@ -836,17 +849,20 @@ void ThinLTOCodeGenerator::run() {
  StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
  Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);

+  // Convert the preserved symbols set from string to GUID, this is needed for
+  // computing the caching hash and the internalization.
+  auto GUIDPreservedSymbols =
+      computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+
+  // Compute "dead" symbols, we don't want to import/export these!
+  auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols);
+
  // Collect the import/export lists for all modules from the call-graph in the
  // combined index.
  StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
  StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
  ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
-                           ExportLists);
-
-  // Convert the preserved symbols set from string to GUID, this is needed for
-  // computing the caching hash and the internalization.
-  auto GUIDPreservedSymbols =
-      computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+                           ExportLists, &DeadSymbols);

  // We use a std::map here to be able to have a defined ordering when
  // producing a hash for the cache entry.
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@ -205,7 +205,7 @@ APInt& APInt::operator++() {

 /// This function subtracts a single "digit" (64-bit word), y, from
 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
-/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
+/// no further borrowing is needed or it runs out of "digits" in x.  The result
 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
 /// In other words, if y > x then this function returns 1, otherwise 0.
 /// @returns the borrow out of the subtraction
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@ -90,6 +90,7 @@ add_llvm_library(LLVMSupport
  StringSaver.cpp
  StringRef.cpp
  SystemUtils.cpp
+  TarWriter.cpp
  TargetParser.cpp
  ThreadPool.cpp
  Timer.cpp
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@ -474,15 +474,25 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model,
      break;

    // Skylake:
-    case 0x4e:
-      *Type = INTEL_COREI7; // "skylake-avx512"
-      *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
-      break;
-    case 0x5e:
+    case 0x4e: // Skylake mobile
+    case 0x5e: // Skylake desktop
+    case 0x8e: // Kaby Lake mobile
+    case 0x9e: // Kaby Lake desktop
      *Type = INTEL_COREI7; // "skylake"
      *Subtype = INTEL_COREI7_SKYLAKE;
      break;

+    // Skylake Xeon:
+    case 0x55:
+      *Type = INTEL_COREI7;
+      // Check that we really have AVX512
+      if (Features & (1 << FEATURE_AVX512)) {
+        *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      } else {
+        *Subtype = INTEL_COREI7_SKYLAKE; // "skylake"
+      }
+      break;
+
    case 0x1c: // Most 45 nm Intel Atom processors
    case 0x26: // 45 nm Atom Lincroft
    case 0x27: // 32 nm Atom Medfield
--- a/lib/Support/TarWriter.cpp
+++ b/lib/Support/TarWriter.cpp
@ -0,0 +1,166 @@
+//===-- TarWriter.cpp - Tar archive file creator --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TarWriter class provides a feature to create a tar archive file.
+//
+// I put emphasis on simplicity over comprehensiveness when implementing this
+// class because we don't need a full-fledged archive file generator in LLVM
+// at the moment.
+//
+// The filename field in the Unix V7 tar header is 100 bytes. Longer filenames
+// are stored using the PAX extension. The PAX header is standardized in
+// POSIX.1-2001.
+//
+// The struct definition of UstarHeader is copied from
+// https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TarWriter.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+// Each file in an archive must be aligned to this block size.
+static const int BlockSize = 512;
+
+struct UstarHeader {
+  char Name[100];
+  char Mode[8];
+  char Uid[8];
+  char Gid[8];
+  char Size[12];
+  char Mtime[12];
+  char Checksum[8];
+  char TypeFlag;
+  char Linkname[100];
+  char Magic[6];
+  char Version[2];
+  char Uname[32];
+  char Gname[32];
+  char DevMajor[8];
+  char DevMinor[8];
+  char Prefix[155];
+  char Pad[12];
+};
+static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header");
+
+// A PAX attribute is in the form of "<length> <key>=<value>\n"
+// where <length> is the length of the entire string including
+// the length field itself. An example string is this.
+//
+//   25 ctime=1084839148.1212\n
+//
+// This function create such string.
+static std::string formatPax(StringRef Key, StringRef Val) {
+  int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n"
+
+  // We need to compute total size twice because appending
+  // a length field could change total size by one.
+  int Total = Len + Twine(Len).str().size();
+  Total = Len + Twine(Total).str().size();
+  return (Twine(Total) + " " + Key + "=" + Val + "\n").str();
+}
+
+// Headers in tar files must be aligned to 512 byte boundaries.
+// This function forwards the current file position to the next boundary.
+static void pad(raw_fd_ostream &OS) {
+  uint64_t Pos = OS.tell();
+  OS.seek(alignTo(Pos, BlockSize));
+}
+
+// Computes a checksum for a tar header.
+static void computeChecksum(UstarHeader &Hdr) {
+  // Before computing a checksum, checksum field must be
+  // filled with space characters.
+  memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum));
+
+  // Compute a checksum and set it to the checksum field.
+  unsigned Chksum = 0;
+  for (size_t I = 0; I < sizeof(Hdr); ++I)
+    Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I];
+  snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum);
+}
+
+// Create a tar header and write it to a given output stream.
+static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) {
+  // A PAX header consists of a 512-byte header followed
+  // by key-value strings. First, create key-value strings.
+  std::string PaxAttr = formatPax("path", Path);
+
+  // Create a 512-byte header.
+  UstarHeader Hdr = {};
+  snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size());
+  Hdr.TypeFlag = 'x';            // PAX magic
+  memcpy(Hdr.Magic, "ustar", 6); // Ustar magic
+  computeChecksum(Hdr);
+
+  // Write them down.
+  OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
+  OS << PaxAttr;
+  pad(OS);
+}
+
+// The PAX header is an extended format, so a PAX header needs
+// to be followed by a "real" header.
+static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) {
+  UstarHeader Hdr = {};
+  memcpy(Hdr.Name, Path.data(), Path.size());
+  memcpy(Hdr.Mode, "0000664", 8);
+  snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size);
+  memcpy(Hdr.Magic, "ustar", 6);
+  computeChecksum(Hdr);
+  OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
+}
+
+// We want to use '/' as a path separator even on Windows.
+// This function canonicalizes a given path.
+static std::string canonicalize(std::string S) {
+#ifdef LLVM_ON_WIN32
+  std::replace(S.begin(), S.end(), '\\', '/');
+#endif
+  return S;
+}
+
+// Creates a TarWriter instance and returns it.
+Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
+                                                       StringRef BaseDir) {
+  int FD;
+  if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None))
+    return make_error<StringError>("cannot open " + OutputPath, EC);
+  return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir));
+}
+
+TarWriter::TarWriter(int FD, StringRef BaseDir)
+    : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {}
+
+// Append a given file to an archive.
+void TarWriter::append(StringRef Path, StringRef Data) {
+  // Write Path and Data.
+  std::string S = BaseDir + "/" + canonicalize(Path) + "\0";
+  if (S.size() <= sizeof(UstarHeader::Name)) {
+    writeUstarHeader(OS, S, Data.size());
+  } else {
+    writePaxHeader(OS, S);
+    writeUstarHeader(OS, "", Data.size());
+  }
+
+  OS << Data;
+  pad(OS);
+
+  // POSIX requires tar archives end with two null blocks.
+  // Here, we write the terminator and then seek back, so that
+  // the file being output is terminated correctly at any moment.
+  uint64_t Pos = OS.tell();
+  OS << std::string(BlockSize * 2, '\0');
+  OS.seek(Pos);
+  OS.flush();
+}
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@ -48,7 +48,7 @@
 // _Unwind_Backtrace function, but on FreeBSD the configure test passes
 // despite the function not existing, and on Android, <unwind.h> conflicts
 // with <link.h>.
-#if defined(__GLIBC__) || defined(__APPLE__)
+#ifdef __GLIBC__
 #include <unwind.h>
 #else
 #undef HAVE__UNWIND_BACKTRACE
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -11,28 +11,79 @@
 //
 //===----------------------------------------------------------------------===//

-#include "AArch64ISelLowering.h"
 #include "AArch64CallingConvention.h"
 #include "AArch64MachineFunctionInfo.h"
+#include "AArch64ISelLowering.h"
 #include "AArch64PerfectShuffle.h"
+#include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
-#include "AArch64TargetObjectFile.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <bitset>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
 using namespace llvm;

 #define DEBUG_TYPE "aarch64-lower"
@ -59,7 +110,6 @@ static const MVT MVT_CC = MVT::i32;
 AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                                             const AArch64Subtarget &STI)
    : TargetLowering(TM), Subtarget(&STI) {
-
  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
  // we have to make something up. Arbitrarily, choose ZeroOrOne.
  setBooleanContents(ZeroOrOneBooleanContent);
@ -218,7 +268,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);

-
  setOperationAction(ISD::CTPOP, MVT::i32, Custom);
  setOperationAction(ISD::CTPOP, MVT::i64, Custom);

@ -3632,6 +3681,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,

  llvm_unreachable("Unexpected platform trying to use TLS");
 }
+
 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
  SDValue Chain = Op.getOperand(0);
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@ -4549,7 +4599,6 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
  return DAG.getMergeValues(Ops, dl);
 }

-
 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
 SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
@ -5074,10 +5123,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
    int WindowBase;
    int WindowScale;

-    bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
    ShuffleSourceInfo(SDValue Vec)
-        : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
-          WindowScale(1) {}
+      : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
+          ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
+
+    bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
  };

  // First gather all vectors used as an immediate source for this BUILD_VECTOR
@ -7028,7 +7078,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    return true;
  }
  case Intrinsic::aarch64_ldaxp:
-  case Intrinsic::aarch64_ldxp: {
+  case Intrinsic::aarch64_ldxp:
    Info.opc = ISD::INTRINSIC_W_CHAIN;
    Info.memVT = MVT::i128;
    Info.ptrVal = I.getArgOperand(0);
@ -7038,9 +7088,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    Info.readMem = true;
    Info.writeMem = false;
    return true;
-  }
  case Intrinsic::aarch64_stlxp:
-  case Intrinsic::aarch64_stxp: {
+  case Intrinsic::aarch64_stxp:
    Info.opc = ISD::INTRINSIC_W_CHAIN;
    Info.memVT = MVT::i128;
    Info.ptrVal = I.getArgOperand(2);
@ -7050,7 +7099,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    Info.readMem = false;
    Info.writeMem = true;
    return true;
-  }
  default:
    break;
  }
@ -8044,13 +8092,13 @@ static SDValue tryCombineToEXTR(SDNode *N,

  SDValue LHS;
  uint32_t ShiftLHS = 0;
-  bool LHSFromHi = 0;
+  bool LHSFromHi = false;
  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
    return SDValue();

  SDValue RHS;
  uint32_t ShiftRHS = 0;
-  bool RHSFromHi = 0;
+  bool RHSFromHi = false;
  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
    return SDValue();

@ -9732,52 +9780,51 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,

  switch(CC) {
  case AArch64CC::LE:
-  case AArch64CC::GT: {
+  case AArch64CC::GT:
    if ((AddConstant == 0) ||
        (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
        (AddConstant >= 0 && CompConstant < 0) ||
        (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
      return true;
-  } break;
+    break;
  case AArch64CC::LT:
-  case AArch64CC::GE: {
+  case AArch64CC::GE:
    if ((AddConstant == 0) ||
        (AddConstant >= 0 && CompConstant <= 0) ||
        (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
      return true;
-  } break;
+    break;
  case AArch64CC::HI:
-  case AArch64CC::LS: {
+  case AArch64CC::LS:
    if ((AddConstant >= 0 && CompConstant < 0) ||
       (AddConstant <= 0 && CompConstant >= -1 &&
        CompConstant < AddConstant + MaxUInt))
      return true;
-  } break;
+   break;
  case AArch64CC::PL:
-  case AArch64CC::MI: {
+  case AArch64CC::MI:
    if ((AddConstant == 0) ||
        (AddConstant > 0 && CompConstant <= 0) ||
        (AddConstant < 0 && CompConstant <= AddConstant))
      return true;
-  } break;
+    break;
  case AArch64CC::LO:
-  case AArch64CC::HS: {
+  case AArch64CC::HS:
    if ((AddConstant >= 0 && CompConstant <= 0) ||
        (AddConstant <= 0 && CompConstant >= 0 &&
         CompConstant <= AddConstant + MaxUInt))
      return true;
-  } break;
+    break;
  case AArch64CC::EQ:
-  case AArch64CC::NE: {
+  case AArch64CC::NE:
    if ((AddConstant > 0 && CompConstant < 0) ||
        (AddConstant < 0 && CompConstant >= 0 &&
         CompConstant < AddConstant + MaxUInt) ||
        (AddConstant >= 0 && CompConstant >= 0 &&
         CompConstant >= AddConstant) ||
        (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
-
      return true;
-  } break;
+    break;
  case AArch64CC::VS:
  case AArch64CC::VC:
  case AArch64CC::AL:
@ -10501,7 +10548,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
  if (ValTy->getPrimitiveSizeInBits() == 128) {
    Intrinsic::ID Int =
        IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
-    Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
+    Function *Ldxr = Intrinsic::getDeclaration(M, Int);

    Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
    Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
@ -10517,7 +10564,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
  Type *Tys[] = { Addr->getType() };
  Intrinsic::ID Int =
      IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
-  Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+  Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);

  return Builder.CreateTruncOrBitCast(
      Builder.CreateCall(Ldxr, Addr),
@ -10527,8 +10574,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
 void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
    IRBuilder<> &Builder) const {
  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-  Builder.CreateCall(
-      llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+  Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
 }

 Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@ -14,16 +14,37 @@
 #include "AArch64InstrInfo.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include <algorithm>
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>

 using namespace llvm;

@ -529,19 +550,19 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
    default:
      llvm_unreachable("Unknown branch opcode in Cond");
    case AArch64::CBZW:
-      Is64Bit = 0;
+      Is64Bit = false;
      CC = AArch64CC::EQ;
      break;
    case AArch64::CBZX:
-      Is64Bit = 1;
+      Is64Bit = true;
      CC = AArch64CC::EQ;
      break;
    case AArch64::CBNZW:
-      Is64Bit = 0;
+      Is64Bit = false;
      CC = AArch64CC::NE;
      break;
    case AArch64::CBNZX:
-      Is64Bit = 1;
+      Is64Bit = true;
      CC = AArch64CC::NE;
      break;
    }
@ -1044,7 +1065,7 @@ static unsigned sForm(MachineInstr &Instr) {
  case AArch64::SUBSWri:
  case AArch64::SUBSXrr:
  case AArch64::SUBSXri:
-    return Instr.getOpcode();;
+    return Instr.getOpcode();

  case AArch64::ADDWrr:    return AArch64::ADDSWrr;
  case AArch64::ADDWri:    return AArch64::ADDSWri;
@ -1072,12 +1093,15 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
 }

 namespace {
+
 struct UsedNZCV {
-  bool N;
-  bool Z;
-  bool C;
-  bool V;
-  UsedNZCV(): N(false), Z(false), C(false), V(false) {}
+  bool N = false;
+  bool Z = false;
+  bool C = false;
+  bool V = false;
+
+  UsedNZCV() = default;
+
  UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
    this->N |= UsedFlags.N;
    this->Z |= UsedFlags.Z;
@ -1086,6 +1110,7 @@ struct UsedNZCV {
    return *this;
  }
 };
+
 } // end anonymous namespace

 /// Find a condition code used by the instruction.
@ -1561,7 +1586,7 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {

 /// Check all MachineMemOperands for a hint to suppress pairing.
 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
-  return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    return MMO->getFlags() & MOSuppressPair;
  });
 }
@ -1994,7 +2019,7 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
 void AArch64InstrInfo::copyPhysRegTuple(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
    unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
-    llvm::ArrayRef<unsigned> Indices) const {
+    ArrayRef<unsigned> Indices) const {
  assert(Subtarget.hasNEON() &&
         "Unexpected register copy without NEON");
  const TargetRegisterInfo *TRI = &getRegisterInfo();
@ -2583,7 +2608,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
  //
  // <rdar://problem/11522048>
  //
-  if (MI.isCopy()) {
+  if (MI.isFullCopy()) {
    unsigned DstReg = MI.getOperand(0).getReg();
    unsigned SrcReg = MI.getOperand(1).getReg();
    if (SrcReg == AArch64::SP &&
@ -2598,7 +2623,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
    }
  }

-  // Handle the case where a copy is being spilled or refilled but the source
+  // Handle the case where a copy is being spilled or filled but the source
  // and destination register class don't match.  For example:
  //
  //   %vreg0<def> = COPY %XZR; GPR64common:%vreg0
@ -2613,7 +2638,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
  //
  //   %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
  //
-  // will be refilled as
+  // will be filled as
  //
  //   LDRDui %vreg0, fi<#0>
  //
@ -2622,9 +2647,11 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
  //   LDRXui %vregTemp, fi<#0>
  //   %vreg0 = FMOV %vregTemp
  //
-  if (MI.isFullCopy() && Ops.size() == 1 &&
+  if (MI.isCopy() && Ops.size() == 1 &&
      // Make sure we're only folding the explicit COPY defs/uses.
      (Ops[0] == 0 || Ops[0] == 1)) {
+    bool IsSpill = Ops[0] == 0;
+    bool IsFill = !IsSpill;
    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
    const MachineRegisterInfo &MRI = MF.getRegInfo();
    MachineBasicBlock &MBB = *MI.getParent();
@ -2632,21 +2659,112 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
    const MachineOperand &SrcMO = MI.getOperand(1);
    unsigned DstReg = DstMO.getReg();
    unsigned SrcReg = SrcMO.getReg();
+    // This is slightly expensive to compute for physical regs since
+    // getMinimalPhysRegClass is slow.
    auto getRegClass = [&](unsigned Reg) {
      return TargetRegisterInfo::isVirtualRegister(Reg)
                 ? MRI.getRegClass(Reg)
                 : TRI.getMinimalPhysRegClass(Reg);
    };
-    const TargetRegisterClass &DstRC = *getRegClass(DstReg);
-    const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
-    if (DstRC.getSize() == SrcRC.getSize()) {
-      if (Ops[0] == 0)
+
+    if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
+      assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+             "Mismatched register size in non subreg COPY");
+      if (IsSpill)
        storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
-                            &SrcRC, &TRI);
+                            getRegClass(SrcReg), &TRI);
      else
-        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+                             getRegClass(DstReg), &TRI);
      return &*--InsertPt;
    }
+
+    // Handle cases like spilling def of:
+    //
+    //   %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+    //
+    // where the physical register source can be widened and stored to the full
+    // virtual reg destination stack slot, in this case producing:
+    //
+    //   STRXui %XZR, <fi#0>
+    //
+    if (IsSpill && DstMO.isUndef() &&
+        TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+      assert(SrcMO.getSubReg() == 0 &&
+             "Unexpected subreg on physical register");
+      const TargetRegisterClass *SpillRC;
+      unsigned SpillSubreg;
+      switch (DstMO.getSubReg()) {
+      default:
+        SpillRC = nullptr;
+        break;
+      case AArch64::sub_32:
+      case AArch64::ssub:
+        if (AArch64::GPR32RegClass.contains(SrcReg)) {
+          SpillRC = &AArch64::GPR64RegClass;
+          SpillSubreg = AArch64::sub_32;
+        } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
+          SpillRC = &AArch64::FPR64RegClass;
+          SpillSubreg = AArch64::ssub;
+        } else
+          SpillRC = nullptr;
+        break;
+      case AArch64::dsub:
+        if (AArch64::FPR64RegClass.contains(SrcReg)) {
+          SpillRC = &AArch64::FPR128RegClass;
+          SpillSubreg = AArch64::dsub;
+        } else
+          SpillRC = nullptr;
+        break;
+      }
+
+      if (SpillRC)
+        if (unsigned WidenedSrcReg =
+                TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
+          storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
+                              FrameIndex, SpillRC, &TRI);
+          return &*--InsertPt;
+        }
+    }
+
+    // Handle cases like filling use of:
+    //
+    //   %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+    //
+    // where we can load the full virtual reg source stack slot, into the subreg
+    // destination, in this case producing:
+    //
+    //   LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+    //
+    if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
+      const TargetRegisterClass *FillRC;
+      switch (DstMO.getSubReg()) {
+      default:
+        FillRC = nullptr;
+        break;
+      case AArch64::sub_32:
+        FillRC = &AArch64::GPR32RegClass;
+        break;
+      case AArch64::ssub:
+        FillRC = &AArch64::FPR32RegClass;
+        break;
+      case AArch64::dsub:
+        FillRC = &AArch64::FPR64RegClass;
+        break;
+      }
+
+      if (FillRC) {
+        assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+               "Mismatched regclass size on folded subreg COPY");
+        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
+        MachineInstr &LoadMI = *--InsertPt;
+        MachineOperand &LoadDst = LoadMI.getOperand(0);
+        assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
+        LoadDst.setSubReg(DstMO.getSubReg());
+        LoadDst.setIsUndef();
+        return &LoadMI;
+      }
+    }
  }

  // Cannot fold.
@ -2936,7 +3054,7 @@ bool AArch64InstrInfo::useMachineCombiner() const {

  return true;
 }
-//
+
 // True when Opc sets flag
 static bool isCombineInstrSettingFlag(unsigned Opc) {
  switch (Opc) {
@ -2955,7 +3073,7 @@ static bool isCombineInstrSettingFlag(unsigned Opc) {
  }
  return false;
 }
-//
+
 // 32b Opcodes that can be combined with a MUL
 static bool isCombineInstrCandidate32(unsigned Opc) {
  switch (Opc) {
@ -2974,7 +3092,7 @@ static bool isCombineInstrCandidate32(unsigned Opc) {
  }
  return false;
 }
-//
+
 // 64b Opcodes that can be combined with a MUL
 static bool isCombineInstrCandidate64(unsigned Opc) {
  switch (Opc) {
@ -2993,7 +3111,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
  }
  return false;
 }
-//
+
 // FP Opcodes that can be combined with a FMUL
 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
  switch (Inst.getOpcode()) {
@ -3009,13 +3127,13 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
  case AArch64::FSUBv2f32:
  case AArch64::FSUBv2f64:
  case AArch64::FSUBv4f32:
-		TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 
-    return (Options.UnsafeFPMath || 
-				    Options.AllowFPOpFusion == FPOpFusion::Fast);
+    TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
+    return (Options.UnsafeFPMath ||
+            Options.AllowFPOpFusion == FPOpFusion::Fast);
  }
  return false;
 }
-//
+
 // Opcodes that can be combined with a MUL
 static bool isCombineInstrCandidate(unsigned Opc) {
  return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
@ -3205,7 +3323,7 @@ static bool getFMAPatterns(MachineInstr &Root,
                           SmallVectorImpl<MachineCombinerPattern> &Patterns) {

  if (!isCombineInstrCandidateFP(Root))
-    return 0;
+    return false;

  MachineBasicBlock &MBB = *Root.getParent();
  bool Found = false;
@ -3971,8 +4089,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
  // Record MUL and ADD/SUB for deletion
  DelInstrs.push_back(MUL);
  DelInstrs.push_back(&Root);
-
-  return;
 }

 /// \brief Replace csincr-branch sequence by simple conditional branch
@ -4148,6 +4264,7 @@ AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
 ArrayRef<std::pair<unsigned, const char *>>
 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
  using namespace AArch64II;
+
  static const std::pair<unsigned, const char *> TargetFlags[] = {
      {MO_PAGE, "aarch64-page"},
      {MO_PAGEOFF, "aarch64-pageoff"},
@ -4162,6 +4279,7 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
 ArrayRef<std::pair<unsigned, const char *>>
 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
  using namespace AArch64II;
+
  static const std::pair<unsigned, const char *> TargetFlags[] = {
      {MO_GOT, "aarch64-got"},
      {MO_NC, "aarch64-nc"},
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@ -162,6 +162,10 @@ public:
                            int FrameIndex, const TargetRegisterClass *RC,
                            const TargetRegisterInfo *TRI) const override;

+  // This tells target independent code that it is okay to pass instructions
+  // with subreg operands to foldMemoryOperandImpl.
+  bool isSubregFoldable() const override { return true; }
+
  using TargetInstrInfo::foldMemoryOperandImpl;
  MachineInstr *
  foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@ -1071,8 +1071,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
      return false;
    }

-    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
-        (CmpInst::Predicate)I.getOperand(1).getPredicate());
+    // CSINC increments the result by one when the condition code is false.
+    // Therefore, we have to invert the predicate to get an increment by 1 when
+    // the predicate is true.
+    const AArch64CC::CondCode invCC =
+        changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
+            (CmpInst::Predicate)I.getOperand(1).getPredicate()));

    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
                               .addDef(ZReg)
@ -1084,7 +1088,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
             .addDef(I.getOperand(0).getReg())
             .addUse(AArch64::WZR)
             .addUse(AArch64::WZR)
-             .addImm(CC);
+             .addImm(invCC);

    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
    constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
--- a/lib/Target/AArch64/AArch64InstructionSelector.h
+++ b/lib/Target/AArch64/AArch64InstructionSelector.h
@ -17,6 +17,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"

 namespace llvm {
+
 class AArch64InstrInfo;
 class AArch64RegisterBankInfo;
 class AArch64RegisterInfo;
@ -29,7 +30,7 @@ public:
                             const AArch64Subtarget &STI,
                             const AArch64RegisterBankInfo &RBI);

-  virtual bool select(MachineInstr &I) const override;
+  bool select(MachineInstr &I) const override;

 private:
  /// tblgen-erated 'select' implementation, used as the initial selector for
@ -43,5 +44,6 @@ private:
  const AArch64RegisterBankInfo &RBI;
 };

-} // End llvm namespace.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@ -14,17 +14,18 @@
 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
 #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H

+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
+#include <cassert>

 namespace llvm {

 /// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
 /// contains private AArch64-specific information for each MachineFunction.
 class AArch64FunctionInfo final : public MachineFunctionInfo {
-
  /// Number of bytes of arguments this function has on the stack. If the callee
  /// is expected to restore the argument stack this should be a multiple of 16,
  /// all usable during a tail call.
@ -34,16 +35,16 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
  /// space to a function with 16-bytes then misalignment of this value would
  /// make a stack adjustment necessary, which could not be undone by the
  /// callee.
-  unsigned BytesInStackArgArea;
+  unsigned BytesInStackArgArea = 0;

  /// The number of bytes to restore to deallocate space for incoming
  /// arguments. Canonically 0 in the C calling convention, but non-zero when
  /// callee is expected to pop the args.
-  unsigned ArgumentStackToRestore;
+  unsigned ArgumentStackToRestore = 0;

  /// HasStackFrame - True if this function has a stack frame. Set by
  /// determineCalleeSaves().
-  bool HasStackFrame;
+  bool HasStackFrame = false;

  /// \brief Amount of stack frame size, not including callee-saved registers.
  unsigned LocalStackSize;
@ -53,54 +54,44 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {

  /// \brief Number of TLS accesses using the special (combinable)
  /// _TLS_MODULE_BASE_ symbol.
-  unsigned NumLocalDynamicTLSAccesses;
+  unsigned NumLocalDynamicTLSAccesses = 0;

  /// \brief FrameIndex for start of varargs area for arguments passed on the
  /// stack.
-  int VarArgsStackIndex;
+  int VarArgsStackIndex = 0;

  /// \brief FrameIndex for start of varargs area for arguments passed in
  /// general purpose registers.
-  int VarArgsGPRIndex;
+  int VarArgsGPRIndex = 0;

  /// \brief Size of the varargs area for arguments passed in general purpose
  /// registers.
-  unsigned VarArgsGPRSize;
+  unsigned VarArgsGPRSize = 0;

  /// \brief FrameIndex for start of varargs area for arguments passed in
  /// floating-point registers.
-  int VarArgsFPRIndex;
+  int VarArgsFPRIndex = 0;

  /// \brief Size of the varargs area for arguments passed in floating-point
  /// registers.
-  unsigned VarArgsFPRSize;
+  unsigned VarArgsFPRSize = 0;

  /// True if this function has a subset of CSRs that is handled explicitly via
  /// copies.
-  bool IsSplitCSR;
+  bool IsSplitCSR = false;

  /// True when the stack gets realigned dynamically because the size of stack
  /// frame is unknown at compile time. e.g., in case of VLAs.
-  bool StackRealigned;
+  bool StackRealigned = false;

  /// True when the callee-save stack area has unused gaps that may be used for
  /// other stack allocations.
-  bool CalleeSaveStackHasFreeSpace;
+  bool CalleeSaveStackHasFreeSpace = false;

 public:
-  AArch64FunctionInfo()
-      : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
-        NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
-        VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
-        IsSplitCSR(false), StackRealigned(false),
-        CalleeSaveStackHasFreeSpace(false) {}
+  AArch64FunctionInfo() = default;

-  explicit AArch64FunctionInfo(MachineFunction &MF)
-      : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
-        NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
-        VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
-        IsSplitCSR(false), StackRealigned(false),
-        CalleeSaveStackHasFreeSpace(false) {
+  explicit AArch64FunctionInfo(MachineFunction &MF) {
    (void)MF;
  }

@ -193,6 +184,7 @@ private:
  MILOHContainer LOHContainerSet;
  SetOfInstructions LOHRelated;
 };
-} // End llvm namespace

-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@ -71,6 +71,7 @@ void AArch64Subtarget::initializeProperties() {
    break;
  case Falkor:
    MaxInterleaveFactor = 4;
+    VectorInsertExtractBaseCost = 2;
    break;
  case Kryo:
    MaxInterleaveFactor = 4;
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@ -15,24 +15,35 @@
 #include "AArch64InstructionSelector.h"
 #include "AArch64LegalizerInfo.h"
 #include "AArch64RegisterBankInfo.h"
+#include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "AArch64TargetObjectFile.h"
 #include "AArch64TargetTransformInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
+#include <memory>
+#include <string>
+
 using namespace llvm;

 static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
@ -154,9 +165,9 @@ extern "C" void LLVMInitializeAArch64Target() {
 //===----------------------------------------------------------------------===//
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  if (TT.isOSBinFormatMachO())
-    return make_unique<AArch64_MachoTargetObjectFile>();
+    return llvm::make_unique<AArch64_MachoTargetObjectFile>();

-  return make_unique<AArch64_ELFTargetObjectFile>();
+  return llvm::make_unique<AArch64_ELFTargetObjectFile>();
 }

 // Helper function to build a DataLayout string
@ -202,29 +213,35 @@ AArch64TargetMachine::AArch64TargetMachine(
  initAsmInfo();
 }

-AArch64TargetMachine::~AArch64TargetMachine() {}
+AArch64TargetMachine::~AArch64TargetMachine() = default;

 #ifdef LLVM_BUILD_GLOBAL_ISEL
 namespace {
+
 struct AArch64GISelActualAccessor : public GISelAccessor {
  std::unique_ptr<CallLowering> CallLoweringInfo;
  std::unique_ptr<InstructionSelector> InstSelector;
  std::unique_ptr<LegalizerInfo> Legalizer;
  std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
  const CallLowering *getCallLowering() const override {
    return CallLoweringInfo.get();
  }
+
  const InstructionSelector *getInstructionSelector() const override {
    return InstSelector.get();
  }
+
  const LegalizerInfo *getLegalizerInfo() const override {
    return Legalizer.get();
  }
+
  const RegisterBankInfo *getRegBankInfo() const override {
    return RegBankInfo.get();
  }
 };
-} // End anonymous namespace.
+
+} // end anonymous namespace
 #endif

 const AArch64Subtarget *
@ -287,6 +304,7 @@ AArch64beTargetMachine::AArch64beTargetMachine(
    : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}

 namespace {
+
 /// AArch64 Code Generator Pass Configuration Options.
 class AArch64PassConfig : public TargetPassConfig {
 public:
@ -324,7 +342,8 @@ public:
  void addPreSched2() override;
  void addPreEmitPass() override;
 };
-} // namespace
+
+} // end anonymous namespace

 TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
  return TargetIRAnalysis([this](const Function &F) {
@ -414,14 +433,17 @@ bool AArch64PassConfig::addIRTranslator() {
  addPass(new IRTranslator());
  return false;
 }
+
 bool AArch64PassConfig::addLegalizeMachineIR() {
  addPass(new Legalizer());
  return false;
 }
+
 bool AArch64PassConfig::addRegBankSelect() {
  addPass(new RegBankSelect());
  return false;
 }
+
 bool AArch64PassConfig::addGlobalInstructionSelect() {
  addPass(new InstructionSelect());
  return false;
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -417,14 +417,17 @@ int AArch64TTIImpl::getArithmeticInstrCost(
  }
 }

-int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+                                              const SCEV *Ptr) {
  // Address computations in vectorized code with non-consecutive addresses will
  // likely result in more instructions compared to scalar code where the
  // computation can more often be merged into the index mode. The resulting
  // extra micro-ops can significantly decrease throughput.
  unsigned NumVectorInstToHideOverhead = 10;
+  int MaxMergeDistance = 64;

-  if (Ty->isVectorTy() && IsComplex)
+  if (Ty->isVectorTy() && SE && 
+      !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
    return NumVectorInstToHideOverhead;

  // In many cases the address computation is not merged into the instruction
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -104,7 +104,7 @@ public:
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);

-  int getAddressComputationCost(Type *Ty, bool IsComplex);
+  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);

  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);

--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@ -9,45 +9,62 @@

 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "MCTargetDesc/AArch64TargetStreamer.h"
 #include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCLinkerOptimizationHint.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cctype>
+#include <cstdint>
 #include <cstdio>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
 using namespace llvm;

 namespace {

-class AArch64Operand;
-
 class AArch64AsmParser : public MCTargetAsmParser {
 private:
  StringRef Mnemonic; ///< Instruction mnemonic.

  // Map of register aliases registers via the .req directive.
-  StringMap<std::pair<bool, unsigned> > RegisterReqs;
+  StringMap<std::pair<bool, unsigned>> RegisterReqs;

  AArch64TargetStreamer &getTargetStreamer() {
    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@ -118,6 +135,7 @@ public:
 #include "AArch64GenAsmMatcher.inc"
  };
  bool IsILP32;
+
  AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
                   const MCInstrInfo &MII, const MCTargetOptions &Options)
    : MCTargetAsmParser(Options, STI) {
@ -143,9 +161,6 @@ public:
                                MCSymbolRefExpr::VariantKind &DarwinRefKind,
                                int64_t &Addend);
 };
-} // end anonymous namespace
-
-namespace {

 /// AArch64Operand - Instances of this class represent a parsed AArch64 machine
 /// instruction.
@ -531,6 +546,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 2);
  }
+
  bool isImm0_7() const {
    if (!isImm())
      return false;
@ -540,6 +556,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 8);
  }
+
  bool isImm1_8() const {
    if (!isImm())
      return false;
@ -549,6 +566,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val > 0 && Val < 9);
  }
+
  bool isImm0_15() const {
    if (!isImm())
      return false;
@ -558,6 +576,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 16);
  }
+
  bool isImm1_16() const {
    if (!isImm())
      return false;
@ -567,6 +586,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val > 0 && Val < 17);
  }
+
  bool isImm0_31() const {
    if (!isImm())
      return false;
@ -576,6 +596,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 32);
  }
+
  bool isImm1_31() const {
    if (!isImm())
      return false;
@ -585,6 +606,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 1 && Val < 32);
  }
+
  bool isImm1_32() const {
    if (!isImm())
      return false;
@ -594,6 +616,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 1 && Val < 33);
  }
+
  bool isImm0_63() const {
    if (!isImm())
      return false;
@ -603,6 +626,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 64);
  }
+
  bool isImm1_63() const {
    if (!isImm())
      return false;
@ -612,6 +636,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 1 && Val < 64);
  }
+
  bool isImm1_64() const {
    if (!isImm())
      return false;
@ -621,6 +646,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 1 && Val < 65);
  }
+
  bool isImm0_127() const {
    if (!isImm())
      return false;
@ -630,6 +656,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 128);
  }
+
  bool isImm0_255() const {
    if (!isImm())
      return false;
@ -639,6 +666,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 256);
  }
+
  bool isImm0_65535() const {
    if (!isImm())
      return false;
@ -648,6 +676,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 0 && Val < 65536);
  }
+
  bool isImm32_63() const {
    if (!isImm())
      return false;
@ -657,6 +686,7 @@ public:
    int64_t Val = MCE->getValue();
    return (Val >= 32 && Val < 64);
  }
+
  bool isLogicalImm32() const {
    if (!isImm())
      return false;
@ -669,6 +699,7 @@ public:
    Val &= 0xFFFFFFFF;
    return AArch64_AM::isLogicalImmediate(Val, 32);
  }
+
  bool isLogicalImm64() const {
    if (!isImm())
      return false;
@ -677,6 +708,7 @@ public:
      return false;
    return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
  }
+
  bool isLogicalImm32Not() const {
    if (!isImm())
      return false;
@ -686,6 +718,7 @@ public:
    int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
    return AArch64_AM::isLogicalImmediate(Val, 32);
  }
+
  bool isLogicalImm64Not() const {
    if (!isImm())
      return false;
@ -694,7 +727,9 @@ public:
      return false;
    return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64);
  }
+
  bool isShiftedImm() const { return Kind == k_ShiftedImm; }
+
  bool isAddSubImm() const {
    if (!isShiftedImm() && !isImm())
      return false;
@ -737,6 +772,7 @@ public:
    // code deal with it.
    return true;
  }
+
  bool isAddSubImmNeg() const {
    if (!isShiftedImm() && !isImm())
      return false;
@ -756,7 +792,9 @@ public:
    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
    return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff;
  }
+
  bool isCondCode() const { return Kind == k_CondCode; }
+
  bool isSIMDImmType10() const {
    if (!isImm())
      return false;
@ -765,6 +803,7 @@ public:
      return false;
    return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
  }
+
  bool isBranchTarget26() const {
    if (!isImm())
      return false;
@ -776,6 +815,7 @@ public:
      return false;
    return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
  }
+
  bool isPCRelLabel19() const {
    if (!isImm())
      return false;
@ -787,6 +827,7 @@ public:
      return false;
    return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
  }
+
  bool isBranchTarget14() const {
    if (!isImm())
      return false;
@ -891,40 +932,49 @@ public:
  bool isFPImm() const { return Kind == k_FPImm; }
  bool isBarrier() const { return Kind == k_Barrier; }
  bool isSysReg() const { return Kind == k_SysReg; }
+
  bool isMRSSystemRegister() const {
    if (!isSysReg()) return false;

    return SysReg.MRSReg != -1U;
  }
+
  bool isMSRSystemRegister() const {
    if (!isSysReg()) return false;
    return SysReg.MSRReg != -1U;
  }
+
  bool isSystemPStateFieldWithImm0_1() const {
    if (!isSysReg()) return false;
    return (SysReg.PStateField == AArch64PState::PAN ||
            SysReg.PStateField == AArch64PState::UAO);
  }
+
  bool isSystemPStateFieldWithImm0_15() const {
    if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false;
    return SysReg.PStateField != -1U;
  }
+
  bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
  bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+
  bool isVectorRegLo() const {
    return Kind == k_Register && Reg.isVector &&
           AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
               Reg.RegNum);
  }
+
  bool isGPR32as64() const {
    return Kind == k_Register && !Reg.isVector &&
      AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
  }
+
  bool isWSeqPair() const {
    return Kind == k_Register && !Reg.isVector &&
           AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
               Reg.RegNum);
  }
+
  bool isXSeqPair() const {
    return Kind == k_Register && !Reg.isVector &&
           AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains(
@ -957,19 +1007,25 @@ public:
  bool isVectorIndex1() const {
    return Kind == k_VectorIndex && VectorIndex.Val == 1;
  }
+
  bool isVectorIndexB() const {
    return Kind == k_VectorIndex && VectorIndex.Val < 16;
  }
+
  bool isVectorIndexH() const {
    return Kind == k_VectorIndex && VectorIndex.Val < 8;
  }
+
  bool isVectorIndexS() const {
    return Kind == k_VectorIndex && VectorIndex.Val < 4;
  }
+
  bool isVectorIndexD() const {
    return Kind == k_VectorIndex && VectorIndex.Val < 2;
  }
+
  bool isToken() const override { return Kind == k_Token; }
+
  bool isTokenEqual(StringRef Str) const {
    return Kind == k_Token && getToken() == Str;
  }
@ -1006,6 +1062,7 @@ public:
    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
    return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
  }
+
  bool isExtendLSL64() const {
    if (!isExtend())
      return false;
@ -1836,11 +1893,10 @@ void AArch64Operand::print(raw_ostream &OS) const {
      OS << "<prfop invalid #" << getPrefetch() << ">";
    break;
  }
-  case k_PSBHint: {
+  case k_PSBHint:
    OS << getPSBHintName();
    break;
-  }
-  case k_ShiftExtend: {
+  case k_ShiftExtend:
    OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
       << getShiftExtendAmount();
    if (!hasShiftExtendAmount())
@ -1848,7 +1904,6 @@ void AArch64Operand::print(raw_ostream &OS) const {
    OS << '>';
    break;
  }
-  }
 }

 /// @name Auto-generated Match Functions
@ -2469,7 +2524,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
    Expr = MCConstantExpr::create(op2, getContext());                          \
    Operands.push_back(                                                        \
        AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));           \
-  } while (0)
+  } while (false)

  if (Mnemonic == "ic") {
    if (!Op.compare_lower("ialluis")) {
@ -3979,7 +4034,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
    }
  }

-
  switch (MatchResult) {
  case Match_Success: {
    // Perform range checking and other semantic validations
@ -4550,7 +4604,6 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
  return Match_InvalidOperand;
 }

-
 OperandMatchResultTy
 AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {

@ -4601,7 +4654,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
    return MatchOperand_ParseFail;
  }

- if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
+  if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
      (isXReg && !XRegClass.contains(SecondReg)) ||
      (isWReg && !WRegClass.contains(SecondReg))) {
    Error(E,"expected second odd register of a "
@ -4610,7 +4663,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
  }

  unsigned Pair = 0;
-  if(isXReg) {
+  if (isXReg) {
    Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64,
           &AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]);
  } else {
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@ -17,15 +17,12 @@

 namespace llvm {

-class MCInst;
-class raw_ostream;
-
 class AArch64Disassembler : public MCDisassembler {
 public:
  AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
    : MCDisassembler(STI, Ctx) {}

-  ~AArch64Disassembler() {}
+  ~AArch64Disassembler() override = default;

  MCDisassembler::DecodeStatus
  getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
@ -33,6 +30,6 @@ public:
                 raw_ostream &CStream) const override;
 };

-} // namespace llvm
+} // end namespace llvm

-#endif
+#endif // LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@ -17,25 +17,30 @@
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>

 using namespace llvm;

 namespace {
+
 class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
 public:
  AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian, bool IsILP32);

-  ~AArch64ELFObjectWriter() override;
+  ~AArch64ELFObjectWriter() override = default;

 protected:
  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
                        const MCFixup &Fixup, bool IsPCRel) const override;
  bool IsILP32;
-private:
 };
-}
+
+} // end anonymous namespace

 AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
                                               bool IsLittleEndian,
@ -44,8 +49,6 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
                              /*HasRelocationAddend*/ true),
      IsILP32(IsILP32) {}

-AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {}
-
 #define R_CLS(rtype) \
        IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
 #define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@ -15,15 +15,23 @@
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
 #include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
 using namespace llvm;

 #define DEBUG_TYPE "mccodeemitter"
@ -37,13 +45,12 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
  MCContext &Ctx;
  const MCInstrInfo &MCII;

-  AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
-  void operator=(const AArch64MCCodeEmitter &);     // DO NOT IMPLEMENT
 public:
  AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
      : Ctx(ctx), MCII(mcii) {}
-
-  ~AArch64MCCodeEmitter() override {}
+  AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) = delete;
+  void operator=(const AArch64MCCodeEmitter &) = delete;
+  ~AArch64MCCodeEmitter() override = default;

  // getBinaryCodeForInstr - TableGen'erated function for getting the
  // binary encoding for an instruction.
@ -181,12 +188,6 @@ private:

 } // end anonymous namespace

-MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
-                                                const MCRegisterInfo &MRI,
-                                                MCContext &Ctx) {
-  return new AArch64MCCodeEmitter(MCII, Ctx);
-}
-
 /// getMachineOpValue - Return binary encoding of operand. If the machine
 /// operand requires relocation, record the relocation and return zero.
 unsigned
@ -601,3 +602,9 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(

 #define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "AArch64GenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                MCContext &Ctx) {
+  return new AArch64MCCodeEmitter(MCII, Ctx);
+}
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@ -13,6 +13,7 @@

 #include "AArch64TargetStreamer.h"
 #include "llvm/MC/ConstantPools.h"
+
 using namespace llvm;

 //
@ -21,7 +22,7 @@ using namespace llvm;
 AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S)
    : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}

-AArch64TargetStreamer::~AArch64TargetStreamer() {}
+AArch64TargetStreamer::~AArch64TargetStreamer() = default;

 // The constant pool handling is shared by all AArch64TargetStreamer
 // implementations.
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@ -176,12 +176,14 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {

  SetupMachineFunction(MF);

-  MCContext &Context = getObjFileLowering().getContext();
-  MCSectionELF *ConfigSection =
-      Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
-  OutStreamer->SwitchSection(ConfigSection);
-
  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  MCContext &Context = getObjFileLowering().getContext();
+  if (!STM.isAmdHsaOS()) {
+    MCSectionELF *ConfigSection =
+        Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+    OutStreamer->SwitchSection(ConfigSection);
+  }
+
  SIProgramInfo KernelInfo;
  if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
    getSIProgramInfo(KernelInfo, MF);
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@ -269,7 +269,7 @@ unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {

 unsigned encodeWaitcnt(IsaVersion Version,
                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
-  unsigned Waitcnt = getWaitcntBitMask(Version);;
+  unsigned Waitcnt = getWaitcntBitMask(Version);
  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }

-int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+                                          const SCEV *Ptr) {
  // Address computations in vectorized code with non-consecutive addresses will
  // likely result in more instructions compared to scalar code where the
  // computation can more often be merged into the index mode. The resulting
  // extra micro-ops can significantly decrease throughput.
  unsigned NumVectorInstToHideOverhead = 10;
+  int MaxMergeDistance = 64;

-  if (Ty->isVectorTy() && IsComplex)
+  if (Ty->isVectorTy() && SE && 
+      !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
    return NumVectorInstToHideOverhead;

  // In many cases the address computation is not merged into the instruction
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@ -104,7 +104,8 @@ public:

  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);

-  int getAddressComputationCost(Type *Val, bool IsComplex);
+  int getAddressComputationCost(Type *Val, ScalarEvolution *SE, 
+                                const SCEV *Ptr);

  int getFPOpCost(Type *Ty);

--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@ -8,23 +8,41 @@
 //===----------------------------------------------------------------------===//

 #include "Lanai.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
 #include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>

 namespace llvm {
+
+// Auto-generated by TableGen
+static unsigned MatchRegisterName(StringRef Name);
+
 namespace {
+
 struct LanaiOperand;

 class LanaiAsmParser : public MCTargetAsmParser {
@ -80,9 +98,6 @@ private:
  const MCSubtargetInfo &SubtargetInfo;
 };

-// Auto-generated by TableGen
-static unsigned MatchRegisterName(llvm::StringRef Name);
-
 // LanaiOperand - Instances of this class represented a parsed machine
 // instruction
 struct LanaiOperand : public MCParsedAsmOperand {
@ -627,6 +642,8 @@ public:
  }
 };

+} // end anonymous namespace
+
 bool LanaiAsmParser::ParseDirective(AsmToken /*DirectiveId*/) { return true; }

 bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
@ -680,11 +697,11 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseRegister() {
  if (Lexer.getKind() == AsmToken::Identifier) {
    RegNum = MatchRegisterName(Lexer.getTok().getIdentifier());
    if (RegNum == 0)
-      return 0;
+      return nullptr;
    Parser.Lex(); // Eat identifier token
    return LanaiOperand::createReg(RegNum, Start, End);
  }
-  return 0;
+  return nullptr;
 }

 bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
@ -701,15 +718,15 @@ bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
 std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
  SMLoc Start = Parser.getTok().getLoc();
  SMLoc End = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-  const MCExpr *Res, *RHS = 0;
+  const MCExpr *Res, *RHS = nullptr;
  LanaiMCExpr::VariantKind Kind = LanaiMCExpr::VK_Lanai_None;

  if (Lexer.getKind() != AsmToken::Identifier)
-    return 0;
+    return nullptr;

  StringRef Identifier;
  if (Parser.parseIdentifier(Identifier))
-    return 0;
+    return nullptr;

  // Check if identifier has a modifier
  if (Identifier.equals_lower("hi"))
@ -722,24 +739,24 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
  if (Kind != LanaiMCExpr::VK_Lanai_None) {
    if (Lexer.getKind() != AsmToken::LParen) {
      Error(Lexer.getLoc(), "Expected '('");
-      return 0;
+      return nullptr;
    }
    Lexer.Lex(); // lex '('

    // Parse identifier
    if (Parser.parseIdentifier(Identifier))
-      return 0;
+      return nullptr;
  }

  // If addition parse the RHS.
  if (Lexer.getKind() == AsmToken::Plus && Parser.parseExpression(RHS))
-    return 0;
+    return nullptr;

  // For variants parse the final ')'
  if (Kind != LanaiMCExpr::VK_Lanai_None) {
    if (Lexer.getKind() != AsmToken::RParen) {
      Error(Lexer.getLoc(), "Expected ')'");
-      return 0;
+      return nullptr;
    }
    Lexer.Lex(); // lex ')'
  }
@ -771,7 +788,7 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseImmediate() {
    if (!Parser.parseExpression(ExprVal))
      return LanaiOperand::createImm(ExprVal, Start, End);
  default:
-    return 0;
+    return nullptr;
  }
 }

@ -1204,10 +1221,9 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
 #define GET_REGISTER_MATCHER
 #define GET_MATCHER_IMPLEMENTATION
 #include "LanaiGenAsmMatcher.inc"
-} // namespace

 extern "C" void LLVMInitializeLanaiAsmParser() {
  RegisterMCAsmParser<LanaiAsmParser> x(getTheLanaiTarget());
 }

-} // namespace llvm
+} // end namespace llvm
--- a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
+++ b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
@ -20,14 +20,11 @@

 namespace llvm {

-class MCInst;
-class raw_ostream;
-
 class LanaiDisassembler : public MCDisassembler {
 public:
  LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx);

-  ~LanaiDisassembler() override {}
+  ~LanaiDisassembler() override = default;

  // getInstruction - See MCDisassembler.
  MCDisassembler::DecodeStatus
@ -36,6 +33,6 @@ public:
                 raw_ostream &CStream) const override;
 };

-} // namespace llvm
+} // end namespace llvm

 #endif // LLVM_LIB_TARGET_LANAI_DISASSEMBLER_LANAIDISASSEMBLER_H
--- a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
+++ b/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
@ -14,10 +14,10 @@
 #ifndef LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
 #define LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstPrinter.h"

 namespace llvm {
-class MCOperand;

 class LanaiInstPrinter : public MCInstPrinter {
 public:
@ -28,14 +28,14 @@ public:
  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
                 const MCSubtargetInfo &STI) override;
  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                    const char *Modifier = 0);
+                    const char *Modifier = nullptr);
  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
  void printMemRiOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                         const char *Modifier = 0);
+                         const char *Modifier = nullptr);
  void printMemRrOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                         const char *Modifier = 0);
+                         const char *Modifier = nullptr);
  void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                           const char *Modifier = 0);
+                           const char *Modifier = nullptr);
  void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O);
  void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O);
  void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@ -60,6 +60,7 @@ private:
  bool printMemoryStoreIncrement(const MCInst *MI, raw_ostream &Ostream,
                                 StringRef Opcode, int AddOffset);
 };
-} // namespace llvm
+
+} // end namespace llvm

 #endif // LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
--- a/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/lib/Target/Lanai/LanaiISelLowering.cpp
@ -11,31 +11,46 @@
 //
 //===----------------------------------------------------------------------===//

-#include "LanaiISelLowering.h"
-
 #include "Lanai.h"
+#include "LanaiCondCode.h"
+#include "LanaiISelLowering.h"
 #include "LanaiMachineFunctionInfo.h"
 #include "LanaiSubtarget.h"
-#include "LanaiTargetMachine.h"
 #include "LanaiTargetObjectFile.h"
+#include "MCTargetDesc/LanaiBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>

 #define DEBUG_TYPE "lanai-lower"

@ -195,6 +210,7 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
    llvm_unreachable("unimplemented operand");
  }
 }
+
 //===----------------------------------------------------------------------===//
 //                       Lanai Inline Assembly Support
 //===----------------------------------------------------------------------===//
@ -244,7 +260,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
  Value *CallOperandVal = Info.CallOperandVal;
  // If we don't have a value, we can't do a match,
  // but allow it at the lowest weight.
-  if (CallOperandVal == NULL)
+  if (CallOperandVal == nullptr)
    return CW_Default;
  // Look at the constraint type.
  switch (*Constraint) {
@ -270,7 +286,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
 void LanaiTargetLowering::LowerAsmOperandForConstraint(
    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
    SelectionDAG &DAG) const {
-  SDValue Result(0, 0);
+  SDValue Result(nullptr, 0);

  // Only support length 1 constraints for now.
  if (Constraint.length() > 1)
@ -676,7 +692,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
    } else {
      assert(VA.isMemLoc());

-      if (StackPtr.getNode() == 0)
+      if (StackPtr.getNode() == nullptr)
        StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP,
                                      getPointerTy(DAG.getDataLayout()));

@ -1120,7 +1136,7 @@ const char *LanaiTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case LanaiISD::SMALL:
    return "LanaiISD::SMALL";
  default:
-    return NULL;
+    return nullptr;
  }
 }

--- a/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/lib/Target/Lanai/LanaiRegisterInfo.h
@ -21,9 +21,6 @@

 namespace llvm {

-class TargetInstrInfo;
-class Type;
-
 struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
  LanaiRegisterInfo();

@ -32,7 +29,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {

  // Code Generation virtual methods.
  const uint16_t *
-  getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
+  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;

  BitVector getReservedRegs(const MachineFunction &MF) const override;

@ -42,7 +39,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {

  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                           unsigned FIOperandNum,
-                           RegScavenger *RS = NULL) const override;
+                           RegScavenger *RS = nullptr) const override;

  bool canRealignStack(const MachineFunction &MF) const override;

@ -58,6 +55,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
  int getDwarfRegNum(unsigned RegNum, bool IsEH) const;
 };

-} // namespace llvm
+} // end namespace llvm

 #endif // LLVM_LIB_TARGET_LANAI_LANAIREGISTERINFO_H
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@ -9,20 +9,19 @@

 #include "MCTargetDesc/LanaiBaseInfo.h"
 #include "MCTargetDesc/LanaiFixupKinds.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
 #include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"

 using namespace llvm;

 namespace {
+
 class LanaiELFObjectWriter : public MCELFObjectTargetWriter {
 public:
  explicit LanaiELFObjectWriter(uint8_t OSABI);

-  ~LanaiELFObjectWriter() override;
+  ~LanaiELFObjectWriter() override = default;

 protected:
  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
@ -30,14 +29,13 @@ protected:
  bool needsRelocateWithSymbol(const MCSymbol &SD,
                               unsigned Type) const override;
 };
-} // namespace
+
+} // end anonymous namespace

 LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI)
    : MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI,
                              /*HasRelocationAddend=*/true) {}

-LanaiELFObjectWriter::~LanaiELFObjectWriter() {}
-
 unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
                                            const MCValue & /*Target*/,
                                            const MCFixup &Fixup,
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@ -12,37 +12,38 @@
 //===----------------------------------------------------------------------===//

 #include "Lanai.h"
+#include "LanaiAluCode.h"
 #include "MCTargetDesc/LanaiBaseInfo.h"
 #include "MCTargetDesc/LanaiFixupKinds.h"
 #include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>

 #define DEBUG_TYPE "mccodeemitter"

 STATISTIC(MCNumEmitted, "Number of MC instructions emitted");

 namespace llvm {
+
 namespace {
+
 class LanaiMCCodeEmitter : public MCCodeEmitter {
-  LanaiMCCodeEmitter(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT
-  void operator=(const LanaiMCCodeEmitter &);     // DO NOT IMPLEMENT
-  const MCInstrInfo &InstrInfo;
-  MCContext &Context;
-
 public:
-  LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C)
-      : InstrInfo(MCII), Context(C) {}
-
-  ~LanaiMCCodeEmitter() override {}
+  LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C) {}
+  LanaiMCCodeEmitter(const LanaiMCCodeEmitter &) = delete;
+  void operator=(const LanaiMCCodeEmitter &) = delete;
+  ~LanaiMCCodeEmitter() override = default;

  // The functions below are called by TableGen generated functions for getting
  // the binary encoding of instructions/opereands.
@ -86,6 +87,8 @@ public:
                            const MCSubtargetInfo &STI) const;
 };

+} // end anonymous namespace
+
 Lanai::Fixups FixupKind(const MCExpr *Expr) {
  if (isa<MCSymbolRefExpr>(Expr))
    return Lanai::FIXUP_LANAI_21;
@ -298,8 +301,8 @@ unsigned LanaiMCCodeEmitter::getBranchTargetOpValue(
 }

 #include "LanaiGenMCCodeEmitter.inc"
-} // namespace
-} // namespace llvm
+
+} // end namespace llvm

 llvm::MCCodeEmitter *
 llvm::createLanaiMCCodeEmitter(const MCInstrInfo &InstrInfo,
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@ -11,16 +11,21 @@
 //
 //===----------------------------------------------------------------------===//

-#include "LanaiMCTargetDesc.h"
-
-#include "InstPrinter/LanaiInstPrinter.h"
 #include "LanaiMCAsmInfo.h"
+#include "LanaiMCTargetDesc.h"
+#include "InstPrinter/LanaiInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
+#include <cstdint>
+#include <string>

 #define GET_INSTRINFO_MC_DESC
 #include "LanaiGenInstrInfo.inc"
@ -70,7 +75,7 @@ static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/,
                                               const MCRegisterInfo &MRI) {
  if (SyntaxVariant == 0)
    return new LanaiInstPrinter(MAI, MII, MRI);
-  return 0;
+  return nullptr;
 }

 static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
@ -79,6 +84,7 @@ static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
 }

 namespace {
+
 class LanaiMCInstrAnalysis : public MCInstrAnalysis {
 public:
  explicit LanaiMCInstrAnalysis(const MCInstrInfo *Info)
@ -107,6 +113,7 @@ public:
    }
  }
 };
+
 } // end anonymous namespace

 static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) {
@ -131,7 +138,7 @@ extern "C" void LLVMInitializeLanaiTargetMC() {

  // Register the MC code emitter
  TargetRegistry::RegisterMCCodeEmitter(getTheLanaiTarget(),
-                                        llvm::createLanaiMCCodeEmitter);
+                                        createLanaiMCCodeEmitter);

  // Register the ASM Backend
  TargetRegistry::RegisterMCAsmBackend(getTheLanaiTarget(),
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@ -47,7 +47,7 @@ namespace llvm {
      FCTIDZ, FCTIWZ,

      /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
-      /// unsigned integers.
+      /// unsigned integers with round toward zero.
      FCTIDUZ, FCTIWUZ,

      /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@ -1154,6 +1154,9 @@ defm FCFID  : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
 defm FCTID  : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
                        "fctid", "$frD, $frB", IIC_FPGeneral,
                        []>, isPPC64;
+defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fctidu", "$frD, $frB", IIC_FPGeneral,
+                        []>, isPPC64;
 defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
                        "fctidz", "$frD, $frB", IIC_FPGeneral,
                        [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@ -603,6 +603,12 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
  let Inst{31}    = 0;
 }

+class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+  : XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
+  let FRA = 0;
+}
+
 // Used for QPX
 class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -2172,11 +2172,19 @@ let isCompare = 1, hasSideEffects = 0 in {
                        "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
 }

+def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+                      "ftdiv $crD, $fA, $fB", IIC_FPCompare>;
+def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
+                      "ftsqrt $crD, $fB", IIC_FPCompare>;
+
 let Uses = [RM] in {
  let hasSideEffects = 0 in {
  defm FCTIW  : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
                          "fctiw", "$frD, $frB", IIC_FPGeneral,
                          []>;
+  defm FCTIWU  : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "fctiwu", "$frD, $frB", IIC_FPGeneral,
+                          []>;
  defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
                          "fctiwz", "$frD, $frB", IIC_FPGeneral,
                          [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -16985,10 +16985,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
  }

-  if (Cond.getOpcode() == ISD::SETCC) {
+  if (Cond.getOpcode() == ISD::SETCC)
    if (SDValue NewCond = LowerSETCC(Cond, DAG))
      Cond = NewCond;
-  }

  // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
  // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
@ -18289,6 +18288,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
 /// constant. Takes immediate version of shift as input.
 static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
                                   SDValue SrcOp, SDValue ShAmt,
+                                   const X86Subtarget &Subtarget,
                                   SelectionDAG &DAG) {
  MVT SVT = ShAmt.getSimpleValueType();
  assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
@ -18306,27 +18306,32 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
    case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
  }

-  const X86Subtarget &Subtarget =
-      static_cast<const X86Subtarget &>(DAG.getSubtarget());
-  if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
-      ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
-    // Let the shuffle legalizer expand this shift amount node.
+  // Need to build a vector containing shift amount.
+  // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
+  // +=================+============+=======================================+
+  // | ShAmt is        | HasSSE4.1? | Construct ShAmt vector as             |
+  // +=================+============+=======================================+
+  // | i64             | Yes, No    | Use ShAmt as lowest elt               |
+  // | i32             | Yes        | zero-extend in-reg                    |
+  // | (i32 zext(i16)) | Yes        | zero-extend in-reg                    |
+  // | i16/i32         | No         | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+  // +=================+============+=======================================+
+
+  if (SVT == MVT::i64)
+    ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+  else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+           ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
    SDValue Op0 = ShAmt.getOperand(0);
    Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
-    ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG);
+    ShAmt = DAG.getZeroExtendVectorInReg(Op0, SDLoc(Op0), MVT::v2i64);
+  } else if (Subtarget.hasSSE41() &&
+             ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
+    ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
  } else {
-    // Need to build a vector containing shift amount.
-    // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
-    SmallVector<SDValue, 4> ShOps;
-    ShOps.push_back(ShAmt);
-    if (SVT == MVT::i32) {
-      ShOps.push_back(DAG.getConstant(0, dl, SVT));
-      ShOps.push_back(DAG.getUNDEF(SVT));
-    }
-    ShOps.push_back(DAG.getUNDEF(SVT));
-
-    MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
-    ShAmt = DAG.getBuildVector(BVT, dl, ShOps);
+    SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
+                                     DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+    ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
  }

  // The return type has to be a 128-bit type with the same element
@ -19014,7 +19019,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
    }
    case VSHIFT:
      return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
-                                 Op.getOperand(1), Op.getOperand(2), DAG);
+                                 Op.getOperand(1), Op.getOperand(2), Subtarget,
+                                 DAG);
    case COMPRESS_EXPAND_IN_REG: {
      SDValue Mask = Op.getOperand(3);
      SDValue DataToCompress = Op.getOperand(1);
@ -21276,7 +21282,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
      else if (EltVT.bitsLT(MVT::i32))
        BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);

-      return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG);
+      return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
    }
  }

@ -25951,12 +25957,11 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
 // instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
 static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                    bool FloatDomain,
                                    const X86Subtarget &Subtarget,
                                    unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
  unsigned NumMaskElts = Mask.size();
  unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
-  bool FloatDomain = MaskVT.isFloatingPoint() ||
-                     (!Subtarget.hasAVX2() && MaskVT.is256BitVector());

  // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
  if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
@ -26067,11 +26072,11 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
 // permute instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
 static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                           bool FloatDomain,
                                           const X86Subtarget &Subtarget,
                                           unsigned &Shuffle, MVT &ShuffleVT,
                                           unsigned &PermuteImm) {
  unsigned NumMaskElts = Mask.size();
-  bool FloatDomain = MaskVT.isFloatingPoint();

  bool ContainsZeros = false;
  SmallBitVector Zeroable(NumMaskElts, false);
@ -26211,11 +26216,10 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
 // shuffle instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
 static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
-                                     SDValue &V1, SDValue &V2,
+                                     bool FloatDomain, SDValue &V1, SDValue &V2,
                                     const X86Subtarget &Subtarget,
                                     unsigned &Shuffle, MVT &ShuffleVT,
                                     bool IsUnary) {
-  bool FloatDomain = MaskVT.isFloatingPoint();
  unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();

  if (MaskVT.is128BitVector()) {
@ -26310,13 +26314,13 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
 }

 static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                            bool FloatDomain,
                                            SDValue &V1, SDValue &V2,
                                            SDLoc &DL, SelectionDAG &DAG,
                                            const X86Subtarget &Subtarget,
                                            unsigned &Shuffle, MVT &ShuffleVT,
                                            unsigned &PermuteImm) {
  unsigned NumMaskElts = Mask.size();
-  bool FloatDomain = MaskVT.isFloatingPoint();

  // Attempt to match against PALIGNR byte rotate.
  if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
@ -26594,8 +26598,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
      }
    }

-    if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT,
-                                ShuffleVT)) {
+    if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle,
+                                ShuffleSrcVT, ShuffleVT)) {
      if (Depth == 1 && Root.getOpcode() == Shuffle)
        return false; // Nothing to do!
      if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@ -26609,8 +26613,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
      return true;
    }

-    if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Subtarget, Shuffle,
-                                       ShuffleVT, PermuteImm)) {
+    if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget,
+                                       Shuffle, ShuffleVT, PermuteImm)) {
      if (Depth == 1 && Root.getOpcode() == Shuffle)
        return false; // Nothing to do!
      if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@ -26626,8 +26630,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
    }
  }

-  if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle,
-                               ShuffleVT, UnaryShuffle)) {
+  if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget,
+                               Shuffle, ShuffleVT, UnaryShuffle)) {
    if (Depth == 1 && Root.getOpcode() == Shuffle)
      return false; // Nothing to do!
    if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@ -26643,8 +26647,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
    return true;
  }

-  if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget,
-                                      Shuffle, ShuffleVT, PermuteImm)) {
+  if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL,
+                                      DAG, Subtarget, Shuffle, ShuffleVT,
+                                      PermuteImm)) {
    if (Depth == 1 && Root.getOpcode() == Shuffle)
      return false; // Nothing to do!
    if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@ -28742,6 +28747,27 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
                              DAG.getConstant(Imm, DL, MVT::i8)));
    return true;
  }
+  case ISD::EXTRACT_SUBVECTOR: {
+    unsigned EltSize = EltVT.getSizeInBits();
+    if (EltSize != 32 && EltSize != 64)
+      return false;
+    MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+    // Only change element size, not type.
+    if (VT.isInteger() != OpEltVT.isInteger())
+      return false;
+    uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+    // Op0 needs to be bitcasted to a larger vector with the same element type.
+    SDValue Op0 = Op.getOperand(0);
+    MVT Op0VT = MVT::getVectorVT(EltVT,
+                            Op0.getSimpleValueType().getSizeInBits() / EltSize);
+    Op0 = DAG.getBitcast(Op0VT, Op0);
+    DCI.AddToWorklist(Op0.getNode());
+    DCI.CombineTo(OrigOp.getNode(),
+                  DAG.getNode(Opcode, DL, VT, Op0,
+                              DAG.getConstant(Imm, DL, MVT::i8)));
+    return true;
+  }
  }

  return false;
@ -30921,6 +30947,59 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
  return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
 }

+/// Check if truncation with saturation form type \p SrcVT to \p DstVT
+/// is valid for the given \p Subtarget.
+static bool
+isSATValidOnSubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
+  if (!Subtarget.hasAVX512())
+    return false;
+  EVT SrcElVT = SrcVT.getScalarType();
+  EVT DstElVT = DstVT.getScalarType();
+  if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
+    return false;
+  if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
+    return false;
+  if (SrcVT.is512BitVector() || Subtarget.hasVLX())
+    return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
+  return false;
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched or the unsupported on the current target.
+static SDValue
+detectUSatPattern(SDValue In, EVT VT, const X86Subtarget &Subtarget) {
+  if (In.getOpcode() != ISD::UMIN)
+    return SDValue();
+
+  EVT InVT = In.getValueType();
+  // FIXME: Scalar type may be supported if we move it to vector register.
+  if (!InVT.isVector() || !InVT.isSimple())
+    return SDValue();
+
+  if (!isSATValidOnSubtarget(InVT, VT, Subtarget))
+    return SDValue();
+
+  //Saturation with truncation. We truncate from InVT to VT.
+  assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
+    "Unexpected types for truncate operation");
+
+  SDValue SrcVal;
+  APInt C;
+  if (ISD::isConstantSplatVector(In.getOperand(0).getNode(), C))
+    SrcVal = In.getOperand(1);
+  else if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C))
+    SrcVal = In.getOperand(0);
+  else
+    return SDValue();
+
+  // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+  // the element size of the destination type.
+  return (C == ((uint64_t)1 << VT.getScalarSizeInBits()) - 1) ?
+    SrcVal : SDValue();
+}
+
 /// This function detects the AVG pattern between vectors of unsigned i8/i16,
 /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
 /// X86ISD::AVG instruction.
@ -31487,6 +31566,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
                          St->getPointerInfo(), St->getAlignment(),
                          St->getMemOperand()->getFlags());

+    if (SDValue Val =
+        detectUSatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
+      return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+                             dl, Val, St->getBasePtr(),
+                             St->getMemoryVT(), St->getMemOperand(), DAG);
+
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    unsigned NumElems = VT.getVectorNumElements();
    assert(StVT != VT && "Cannot truncate to the same type");
@ -31967,7 +32052,8 @@ combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,

 /// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
 static SDValue
-combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
+combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG,
                                  SmallVector<SDValue, 8> &Regs) {
  assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
  EVT OutVT = N->getValueType(0);
@ -31976,8 +32062,10 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
  // Shift left by 16 bits, then arithmetic-shift right by 16 bits.
  SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
  for (auto &Reg : Regs) {
-    Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG);
-    Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+    Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt,
+                              Subtarget, DAG);
+    Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt,
+                              Subtarget, DAG);
  }

  for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
@ -32046,7 +32134,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
  if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
    return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
  else if (InSVT == MVT::i32)
-    return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
+    return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec);
  else
    return SDValue();
 }
@ -32104,6 +32192,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
  if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
    return Avg;

+  // Try the truncation with unsigned saturation.
+  if (SDValue Val = detectUSatPattern(Src, VT, Subtarget))
+    return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Val);
+
  // The bitcast source is a direct mmx result.
  // Detect bitcasts between i32 to x86mmx
  if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
--- a/Show More
+++ b/Show More