From a096e0bdf6cfa020569afca490d8e4c9ac8ebb01 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 24 Jan 2018 20:23:48 +0000
Subject: [PATCH 1/6] Vendor import of llvm release_60 branch r323338:
 https://llvm.org/svn/llvm-project/llvm/branches/release_60@323338

---
 cmake/modules/LLVMConfig.cmake.in             |   2 +
 docs/ReleaseNotes.rst                         |  56 ++++-
 include/llvm/Analysis/RegionInfoImpl.h        |  12 +-
 .../CodeGen/SelectionDAGAddressAnalysis.h     |   2 +-
 include/llvm/MC/MCCodeView.h                  |  48 +---
 .../llvm/Support/GenericDomTreeConstruction.h |  66 ++++--
 .../llvm/Transforms/Vectorize/SLPVectorizer.h |   7 +-
 lib/CodeGen/CodeGenPrepare.cpp                |   7 +-
 lib/CodeGen/GlobalMerge.cpp                   |   3 +-
 lib/CodeGen/PeepholeOptimizer.cpp             |  41 ++--
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp      | 116 +++++----
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp      |   4 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp     |   7 +-
 .../SelectionDAGAddressAnalysis.cpp           |  21 +-
 lib/CodeGen/TargetLoweringBase.cpp            |  15 +-
 lib/Linker/IRMover.cpp                        |   7 +-
 lib/MC/MCCodeView.cpp                         |  69 ++++++
 .../AArch64/AArch64InstructionSelector.cpp    |  34 +++
 lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp |  20 --
 lib/Target/PowerPC/PPCISelLowering.cpp        |  43 ++++
 lib/Target/PowerPC/PPCISelLowering.h          |   6 +
 lib/Target/PowerPC/PPCInstrInfo.td            |  12 +
 lib/Target/X86/AsmParser/X86AsmParser.cpp     |   7 +
 lib/Target/X86/X86ISelLowering.cpp            |  62 +++--
 lib/Target/X86/X86TargetTransformInfo.cpp     |   3 +-
 lib/Transforms/Scalar/GVNHoist.cpp            |   2 +-
 lib/Transforms/Scalar/StructurizeCFG.cpp      | 108 +++------
 lib/Transforms/Vectorize/LoopVectorize.cpp    |   9 +-
 lib/Transforms/Vectorize/SLPVectorizer.cpp    |  61 +----
 .../GlobalISel/select-gv-cmodel-large.mir     |  61 +++++
 test/CodeGen/AArch64/atomic-ops-lse.ll        |  47 +++-
 test/CodeGen/AMDGPU/multilevel-break.ll       |   5 +-
 test/CodeGen/AMDGPU/nested-loop-conditions.ll | 133 +++++++----
 test/CodeGen/ARM/and-load-combine.ll          |  14 +-
 test/CodeGen/ARM/atomic-cmpxchg.ll            |   3 +-
 test/CodeGen/ARM/cmpxchg-O0.ll                |   6 +-
 test/CodeGen/ARM/global-merge-dllexport.ll    |  15 ++
 test/CodeGen/ARM/global-merge-external.ll     |  29 ++-
 test/CodeGen/ARM/peephole-phi.mir             |  67 ++++++
 test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll   |  94 ++++++++
 test/CodeGen/PowerPC/atomics-regression.ll    |  40 ++++
 .../X86/avx512-shuffles/partial_permute.ll    |  39 ++++
 test/CodeGen/X86/darwin-bzero.ll              |   9 +-
 test/CodeGen/X86/inline-asm-A-constraint.ll   |   3 +-
 test/CodeGen/X86/pr35761.ll                   |  36 +++
 test/CodeGen/X86/pr35972.ll                   |  20 ++
 test/CodeGen/X86/pr37563.ll                   |  42 ++++
 test/CodeGen/X86/var-permute-128.ll           |   5 +-
 test/CodeGen/X86/var-permute-256.ll           | 180 ++++++++++++++
 test/MC/COFF/cv-inline-linetable.s            |  26 +++
 test/MC/X86/x86-64.s                          |  38 ++-
 .../X86/Inputs/dicompositetype-unique2.ll     |  46 ++++
 test/ThinLTO/X86/dicompositetype-unique2.ll   |  69 ++++++
 .../X86/sink-addrmode-select.ll               |  19 ++
 .../Transforms/GVNHoist/pr35222-hoist-load.ll |  45 ++++
 test/Transforms/JumpThreading/ddt-crash3.ll   |  43 ++++
 test/Transforms/JumpThreading/ddt-crash4.ll   |  75 ++++++
 test/Transforms/LoopVectorize/pr35773.ll      |  53 +++++
 .../Transforms/SLPVectorizer/X86/PR35628_1.ll |  74 ++++++
 .../Transforms/SLPVectorizer/X86/PR35628_2.ll |  64 +++++
 test/Transforms/SLPVectorizer/X86/PR35777.ll  |  48 ++++
 test/Transforms/SLPVectorizer/X86/PR35865.ll  |  27 +++
 .../X86/insert-element-build-vector.ll        | 220 +++++++++---------
 .../SLPVectorizer/X86/insertvalue.ll          | 162 +++++++++++--
 .../Transforms/SLPVectorizer/X86/value-bug.ll |  48 +++-
 .../AMDGPU/backedge-id-bug-xfail.ll           |  77 ++++++
 .../StructurizeCFG/AMDGPU/backedge-id-bug.ll  | 163 +++++++++++++
 .../StructurizeCFG/AMDGPU/lit.local.cfg       |   2 +
 .../StructurizeCFG/nested-loop-order.ll       |  83 ++++---
 .../tools/llvm-readobj/macho-needed-libs.test |  26 +++
 tools/llvm-readobj/MachODumper.cpp            |  30 +++
 .../IR/DominatorTreeBatchUpdatesTest.cpp      |  95 ++++++++
 unittests/IR/DominatorTreeTest.cpp            |  25 ++
 utils/release/test-release.sh                 |  10 +-
 74 files changed, 2673 insertions(+), 593 deletions(-)
 create mode 100644 test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
 create mode 100644 test/CodeGen/ARM/global-merge-dllexport.ll
 create mode 100644 test/CodeGen/ARM/peephole-phi.mir
 create mode 100644 test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
 create mode 100644 test/CodeGen/X86/pr35761.ll
 create mode 100644 test/CodeGen/X86/pr35972.ll
 create mode 100644 test/CodeGen/X86/pr37563.ll
 create mode 100644 test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll
 create mode 100644 test/ThinLTO/X86/dicompositetype-unique2.ll
 create mode 100644 test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
 create mode 100644 test/Transforms/JumpThreading/ddt-crash3.ll
 create mode 100644 test/Transforms/JumpThreading/ddt-crash4.ll
 create mode 100644 test/Transforms/LoopVectorize/pr35773.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/PR35628_1.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/PR35628_2.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/PR35777.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/PR35865.ll
 create mode 100644 test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
 create mode 100644 test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
 create mode 100644 test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg
 create mode 100644 test/tools/llvm-readobj/macho-needed-libs.test

diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in
index 077201691656..fe4df5278498 100644
--- a/cmake/modules/LLVMConfig.cmake.in
+++ b/cmake/modules/LLVMConfig.cmake.in
@@ -37,6 +37,8 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
 
 set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@)
 
+set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@)
+
 set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@)
 
 set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 41b9cf92d767..8ef9f6b86c51 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -54,6 +54,8 @@ Non-comprehensive list of changes in this release
   ``DIVariables`` to the instructions in a ``Module``. The ``CheckDebugify``
   pass determines how much of the metadata is lost.
 
+* Significantly improved quality of CodeView debug info for Windows.
+
 * Note..
 
 .. NOTE
@@ -69,10 +71,13 @@ Non-comprehensive list of changes in this release
 Changes to the LLVM IR
 ----------------------
 
-Changes to the ARM Backend
---------------------------
+Changes to the ARM Target
+-------------------------
 
- During this release ...
+During this release the ARM target has:
+
+* Got support for enabling SjLj exception handling on platforms where it
+  isn't the default.
 
 
 Changes to the MIPS Target
@@ -89,7 +94,10 @@ Changes to the PowerPC Target
 Changes to the X86 Target
 -------------------------
 
- During this release ...
+During this release ...
+
+* Got support for enabling SjLj exception handling on platforms where it
+  isn't the default.
 
 Changes to the AMDGPU Target
 -----------------------------
@@ -116,8 +124,46 @@ Changes to the C API
 External Open Source Projects Using LLVM 6
 ==========================================
 
-* A project...
+JFS - JIT Fuzzing Solver
+------------------------
 
+`JFS <https://github.com/delcypher/jfs>`_ is an experimental constraint solver
+designed to investigate using coverage guided fuzzing as an incomplete strategy
+for solving boolean, BitVector, and floating-point constraints.
+It is built on top of LLVM, Clang, LibFuzzer, and Z3.
+
+The solver works by generating a C++ program where the reachability of an
+`abort()` statement is equivalent to finding a satisfying assignment to the
+constraints. This program is then compiled by Clang with `SanitizerCoverage
+<https://releases.llvm.org/6.0.0/tools/clang/docs/SanitizerCoverage.html>`_
+instrumentation and then fuzzed using :doc:`LibFuzzer <LibFuzzer>`.
+
+Zig Programming Language
+------------------------
+
+`Zig <http://ziglang.org>`_  is an open-source programming language designed
+for robustness, optimality, and clarity. It is intended to replace C. It
+provides high level features such as Generics,
+Compile Time Function Execution, and Partial Evaluation, yet exposes low level
+LLVM IR features such as Aliases. Zig uses Clang to provide automatic
+import of .h symbols - even inline functions and macros. Zig uses LLD combined
+with lazily building compiler-rt to provide out-of-the-box cross-compiling for
+all supported targets.
+
+LDC - the LLVM-based D compiler
+-------------------------------
+
+`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
+pragmatically combines efficiency, control, and modeling power, with safety and
+programmer productivity. D supports powerful concepts like Compile-Time Function
+Execution (CTFE) and Template Meta-Programming, provides an innovative approach
+to concurrency and offers many classical paradigms.
+
+`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
+combined with LLVM as backend to produce efficient native code. LDC targets
+x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
+and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
+are underway.
 
 Additional Information
 ======================
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index 6e522354dd9b..eb6baac2d5e4 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -254,23 +254,23 @@ std::string RegionBase<Tr>::getNameStr() const {
 template <class Tr>
 void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
   if (!contains(BB))
-    llvm_unreachable("Broken region found: enumerated BB not in region!");
+    report_fatal_error("Broken region found: enumerated BB not in region!");
 
   BlockT *entry = getEntry(), *exit = getExit();
 
   for (BlockT *Succ :
        make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) {
     if (!contains(Succ) && exit != Succ)
-      llvm_unreachable("Broken region found: edges leaving the region must go "
-                       "to the exit node!");
+      report_fatal_error("Broken region found: edges leaving the region must go "
+                         "to the exit node!");
   }
 
   if (entry != BB) {
     for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB),
                                    InvBlockTraits::child_end(BB))) {
       if (!contains(Pred))
-        llvm_unreachable("Broken region found: edges entering the region must "
-                         "go to the entry node!");
+        report_fatal_error("Broken region found: edges entering the region must "
+                           "go to the entry node!");
     }
   }
 }
@@ -557,7 +557,7 @@ void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const {
     } else {
       BlockT *BB = Element->template getNodeAs<BlockT>();
       if (getRegionFor(BB) != R)
-        llvm_unreachable("BB map does not match region nesting");
+        report_fatal_error("BB map does not match region nesting");
     }
   }
 }
diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 18e4c7a83def..580606441a9d 100644
--- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -56,7 +56,7 @@ class BaseIndexOffset {
                       int64_t &Off);
 
   /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG);
+  static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h
index e2249f49c86c..c8f14515ed34 100644
--- a/include/llvm/MC/MCCodeView.h
+++ b/include/llvm/MC/MCCodeView.h
@@ -177,13 +177,7 @@ class CodeViewContext {
                                unsigned IACol);
 
   /// Retreive the function info if this is a valid function id, or nullptr.
-  MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId) {
-    if (FuncId >= Functions.size())
-      return nullptr;
-    if (Functions[FuncId].isUnallocatedFunctionInfo())
-      return nullptr;
-    return &Functions[FuncId];
-  }
+  MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId);
 
   /// Saves the information from the currently parsed .cv_loc directive
   /// and sets CVLocSeen.  When the next instruction is assembled an entry
@@ -199,50 +193,22 @@ class CodeViewContext {
     CurrentCVLoc.setIsStmt(IsStmt);
     CVLocSeen = true;
   }
-  void clearCVLocSeen() { CVLocSeen = false; }
 
   bool getCVLocSeen() { return CVLocSeen; }
+  void clearCVLocSeen() { CVLocSeen = false; }
+
   const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; }
 
   bool isValidCVFileNumber(unsigned FileNumber);
 
   /// \brief Add a line entry.
-  void addLineEntry(const MCCVLineEntry &LineEntry) {
-    size_t Offset = MCCVLines.size();
-    auto I = MCCVLineStartStop.insert(
-        {LineEntry.getFunctionId(), {Offset, Offset + 1}});
-    if (!I.second)
-      I.first->second.second = Offset + 1;
-    MCCVLines.push_back(LineEntry);
-  }
+  void addLineEntry(const MCCVLineEntry &LineEntry);
 
-  std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId) {
-    std::vector<MCCVLineEntry> FilteredLines;
+  std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId);
 
-    auto I = MCCVLineStartStop.find(FuncId);
-    if (I != MCCVLineStartStop.end())
-      for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
-           ++Idx)
-        if (MCCVLines[Idx].getFunctionId() == FuncId)
-          FilteredLines.push_back(MCCVLines[Idx]);
-    return FilteredLines;
-  }
+  std::pair<size_t, size_t> getLineExtent(unsigned FuncId);
 
-  std::pair<size_t, size_t> getLineExtent(unsigned FuncId) {
-    auto I = MCCVLineStartStop.find(FuncId);
-    // Return an empty extent if there are no cv_locs for this function id.
-    if (I == MCCVLineStartStop.end())
-      return {~0ULL, 0};
-    return I->second;
-  }
-
-  ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R) {
-    if (R <= L)
-      return None;
-    if (L >= MCCVLines.size())
-      return None;
-    return makeArrayRef(&MCCVLines[L], R - L);
-  }
+  ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R);
 
   /// Emits a line table substream.
   void emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId,
diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h
index 8f801662d0fb..25175fe66aa8 100644
--- a/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/include/llvm/Support/GenericDomTreeConstruction.h
@@ -628,7 +628,7 @@ struct SemiNCAInfo {
         DecreasingLevel>
         Bucket;  // Queue of tree nodes sorted by level in descending order.
     SmallDenseSet<TreeNodePtr, 8> Affected;
-    SmallDenseSet<TreeNodePtr, 8> Visited;
+    SmallDenseMap<TreeNodePtr, unsigned, 8> Visited;
     SmallVector<TreeNodePtr, 8> AffectedQueue;
     SmallVector<TreeNodePtr, 8> VisitedNotAffectedQueue;
   };
@@ -706,7 +706,7 @@ struct SemiNCAInfo {
       // algorithm does not really know or use the set of roots and can make a
       // different (implicit) decision about which nodes within an infinite loop
       // becomes a root.
-      if (DT.isVirtualRoot(TN->getIDom())) {
+      if (TN && !DT.isVirtualRoot(TN->getIDom())) {
         DEBUG(dbgs() << "Root " << BlockNamePrinter(R)
                      << " is not virtual root's child\n"
                      << "The entire tree needs to be rebuilt\n");
@@ -753,14 +753,16 @@ struct SemiNCAInfo {
 
     while (!II.Bucket.empty()) {
       const TreeNodePtr CurrentNode = II.Bucket.top().second;
+      const unsigned  CurrentLevel = CurrentNode->getLevel();
       II.Bucket.pop();
       DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: "
                    << BlockNamePrinter(CurrentNode) << "\n");
-      II.Visited.insert(CurrentNode);
+
+      II.Visited.insert({CurrentNode, CurrentLevel});
       II.AffectedQueue.push_back(CurrentNode);
 
       // Discover and collect affected successors of the current node.
-      VisitInsertion(DT, BUI, CurrentNode, CurrentNode->getLevel(), NCD, II);
+      VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II);
     }
 
     // Finish by updating immediate dominators and levels.
@@ -772,13 +774,17 @@ struct SemiNCAInfo {
                              const TreeNodePtr TN, const unsigned RootLevel,
                              const TreeNodePtr NCD, InsertionInfo &II) {
     const unsigned NCDLevel = NCD->getLevel();
-    DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n");
+    DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ",  RootLevel "
+                 << RootLevel << "\n");
 
     SmallVector<TreeNodePtr, 8> Stack = {TN};
     assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!");
 
+    SmallPtrSet<TreeNodePtr, 8> Processed;
+
     do {
       TreeNodePtr Next = Stack.pop_back_val();
+      DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n");
 
       for (const NodePtr Succ :
            ChildrenGetter<IsPostDom>::Get(Next->getBlock(), BUI)) {
@@ -786,19 +792,31 @@ struct SemiNCAInfo {
         assert(SuccTN && "Unreachable successor found at reachable insertion");
         const unsigned SuccLevel = SuccTN->getLevel();
 
-        DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
-                     << ", level = " << SuccLevel << "\n");
+        DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) << ", level = "
+                     << SuccLevel << "\n");
+
+        // Do not process the same node multiple times.
+        if (Processed.count(Next) > 0)
+          continue;
 
         // Succ dominated by subtree From -- not affected.
         // (Based on the lemma 2.5 from the second paper.)
         if (SuccLevel > RootLevel) {
           DEBUG(dbgs() << "\t\tDominated by subtree From\n");
-          if (II.Visited.count(SuccTN) != 0)
-            continue;
+          if (II.Visited.count(SuccTN) != 0) {
+            DEBUG(dbgs() << "\t\t\talready visited at level "
+                         << II.Visited[SuccTN] << "\n\t\t\tcurrent level "
+                         << RootLevel << ")\n");
+
+            // A node can be necessary to visit again if we see it again at
+            // a lower level than before.
+            if (II.Visited[SuccTN] >= RootLevel)
+              continue;
+          }
 
           DEBUG(dbgs() << "\t\tMarking visited not affected "
                        << BlockNamePrinter(Succ) << "\n");
-          II.Visited.insert(SuccTN);
+          II.Visited.insert({SuccTN, RootLevel});
           II.VisitedNotAffectedQueue.push_back(SuccTN);
           Stack.push_back(SuccTN);
         } else if ((SuccLevel > NCDLevel + 1) &&
@@ -809,6 +827,8 @@ struct SemiNCAInfo {
           II.Bucket.push({SuccLevel, SuccTN});
         }
       }
+
+      Processed.insert(Next);
     } while (!Stack.empty());
   }
 
@@ -920,21 +940,21 @@ struct SemiNCAInfo {
     const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To);
     const TreeNodePtr NCD = DT.getNode(NCDBlock);
 
-    // To dominates From -- nothing to do.
-    if (ToTN == NCD) return;
+    // If To dominates From -- nothing to do.
+    if (ToTN != NCD) {
+      DT.DFSInfoValid = false;
 
-    DT.DFSInfoValid = false;
+      const TreeNodePtr ToIDom = ToTN->getIDom();
+      DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom "
+                   << BlockNamePrinter(ToIDom) << "\n");
 
-    const TreeNodePtr ToIDom = ToTN->getIDom();
-    DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom "
-                 << BlockNamePrinter(ToIDom) << "\n");
-
-    // To remains reachable after deletion.
-    // (Based on the caption under Figure 4. from the second paper.)
-    if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN))
-      DeleteReachable(DT, BUI, FromTN, ToTN);
-    else
-      DeleteUnreachable(DT, BUI, ToTN);
+      // To remains reachable after deletion.
+      // (Based on the caption under Figure 4. from the second paper.)
+      if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN))
+        DeleteReachable(DT, BUI, FromTN, ToTN);
+      else
+        DeleteUnreachable(DT, BUI, ToTN);
+    }
 
     if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI);
   }
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 25f264c4722c..781a628a0974 100644
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -95,14 +95,9 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
   bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
 
   /// \brief Try to vectorize a list of operands.
-  /// \@param BuildVector A list of users to ignore for the purpose of
-  ///                     scheduling and cost estimation when NeedExtraction
-  ///                     is false.
   /// \returns true if a value was vectorized.
   bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
-                          ArrayRef<Value *> BuildVector = None,
-                          bool AllowReorder = false,
-                          bool NeedExtraction = false);
+                          bool AllowReorder = false);
 
   /// \brief Try to vectorize a chain that may start at the operands of \p I.
   bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 9dc1ab4e6bb5..26ca8d4ee88c 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -2700,8 +2700,13 @@ class AddressingModeCombiner {
     // we still need to collect it due to original value is different.
     // And later we will need all original values as anchors during
     // finding the common Phi node.
+    // We also must reject the case when base offset is different and
+    // scale reg is not null, we cannot handle this case due to merge of
+    // different offsets will be used as ScaleReg.
     if (DifferentField != ExtAddrMode::MultipleFields &&
-        DifferentField != ExtAddrMode::ScaleField) {
+        DifferentField != ExtAddrMode::ScaleField &&
+        (DifferentField != ExtAddrMode::BaseOffsField ||
+         !NewAddrMode.ScaledReg)) {
       AddrModes.emplace_back(NewAddrMode);
       return true;
     }
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 8b9545da914e..3888226fa059 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -577,7 +577,8 @@ bool GlobalMerge::doInitialization(Module &M) {
   for (auto &GV : M.globals()) {
     // Merge is safe for "normal" internal or external globals only
     if (GV.isDeclaration() || GV.isThreadLocal() ||
-        GV.hasSection() || GV.hasImplicitSection())
+        GV.hasSection() || GV.hasImplicitSection() ||
+        GV.hasDLLExportStorageClass())
       continue;
 
     // It's not safe to merge globals that may be preempted
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 45078081987a..11acbe687a31 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -719,15 +719,14 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
     CurSrcPair = Pair;
     ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
                             !DisableAdvCopyOpt, TII);
-    ValueTrackerResult Res;
-    bool ShouldRewrite = false;
 
-    do {
-      // Follow the chain of copies until we reach the top of the use-def chain
-      // or find a more suitable source.
-      Res = ValTracker.getNextSource();
+    // Follow the chain of copies until we find a more suitable source, a phi
+    // or have to abort.
+    while (true) {
+      ValueTrackerResult Res = ValTracker.getNextSource();
+      // Abort at the end of a chain (without finding a suitable source).
       if (!Res.isValid())
-        break;
+        return false;
 
       // Insert the Def -> Use entry for the recently found source.
       ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
@@ -763,24 +762,19 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
       if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
         return false;
 
+      // Keep following the chain if the value isn't any better yet.
       const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
-      ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
-                                                CurSrcPair.SubReg);
-    } while (!ShouldRewrite);
+      if (!TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, CurSrcPair.SubReg))
+        continue;
 
-    // Continue looking for new sources...
-    if (Res.isValid())
-      continue;
+      // We currently cannot deal with subreg operands on PHI instructions
+      // (see insertPHI()).
+      if (PHICount > 0 && CurSrcPair.SubReg != 0)
+        continue;
 
-    // Do not continue searching for a new source if the there's at least
-    // one use-def which cannot be rewritten.
-    if (!ShouldRewrite)
-      return false;
-  }
-
-  if (PHICount >= RewritePHILimit) {
-    DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
-    return false;
+      // We found a suitable source, and are done with this chain.
+      break;
+    }
   }
 
   // If we did not find a more suitable source, there is nothing to optimize.
@@ -799,6 +793,9 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
   assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
 
   const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
+  // NewRC is only correct if no subregisters are involved. findNextSource()
+  // should have rejected those cases already.
+  assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
   unsigned NewVR = MRI->createVirtualRegister(NewRC);
   MachineBasicBlock *MBB = OrigPHI->getParent();
   MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 81bff4d7eefa..2c6b724c02df 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
       EVT ExtVT;
       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
           isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
-        // Only add this load if we can make it more narrow.
-        if (ExtVT.bitsLT(Load->getMemoryVT()))
+
+        // ZEXTLOAD is already small enough.
+        if (Load->getExtensionType() == ISD::ZEXTLOAD &&
+            ExtVT.bitsGE(Load->getMemoryVT()))
+          continue;
+
+        // Use LE to convert equal sized loads to zext.
+        if (ExtVT.bitsLE(Load->getMemoryVT()))
           Loads.insert(Load);
+
         continue;
       }
       return false;
@@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
     if (Loads.size() == 0)
       return false;
 
+    DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
     SDValue MaskOp = N->getOperand(1);
 
     // If it exists, fixup the single node we allow in the tree that needs
     // masking.
     if (FixupNode) {
+      DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
                                 FixupNode->getValueType(0),
                                 SDValue(FixupNode, 0), MaskOp);
@@ -3914,14 +3923,21 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
 
     // Narrow any constants that need it.
     for (auto *LogicN : NodesWithConsts) {
-      auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
-      SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
-                                SDValue(C, 0), MaskOp);
-      DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
+      SDValue Op0 = LogicN->getOperand(0);
+      SDValue Op1 = LogicN->getOperand(1);
+
+      if (isa<ConstantSDNode>(Op0))
+          std::swap(Op0, Op1);
+
+      SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
+                                Op1, MaskOp);
+
+      DAG.UpdateNodeOperands(LogicN, Op0, And);
     }
 
     // Create narrow loads.
     for (auto *Load : Loads) {
+      DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
                                 SDValue(Load, 0), MaskOp);
       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
@@ -5209,7 +5225,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
       return SDValue();
 
     // Loads must share the same base address
-    BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+    BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
     int64_t ByteOffsetFromBase = 0;
     if (!Base)
       Base = Ptr;
@@ -12928,7 +12944,7 @@ void DAGCombiner::getStoreMergeCandidates(
     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer.
-  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+  BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   EVT MemVT = St->getMemoryVT();
 
   SDValue Val = peekThroughBitcast(St->getValue());
@@ -12949,7 +12965,7 @@ void DAGCombiner::getStoreMergeCandidates(
   EVT LoadVT;
   if (IsLoadSrc) {
     auto *Ld = cast<LoadSDNode>(Val);
-    LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+    LBasePtr = BaseIndexOffset::match(Ld, DAG);
     LoadVT = Ld->getMemoryVT();
     // Load and store should be the same type.
     if (MemVT != LoadVT)
@@ -12968,7 +12984,7 @@ void DAGCombiner::getStoreMergeCandidates(
         return false;
       // The Load's Base Ptr must also match
       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
-        auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+        auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
         if (LoadVT != OtherLd->getMemoryVT())
           return false;
         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
@@ -12992,7 +13008,7 @@ void DAGCombiner::getStoreMergeCandidates(
           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
         return false;
     }
-    Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
+    Ptr = BaseIndexOffset::match(Other, DAG);
     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   };
 
@@ -13365,7 +13381,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
       if (Ld->getMemoryVT() != MemVT)
         break;
 
-      BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+      BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
       // If this is not the first ptr that we check.
       int64_t LdOffset = 0;
       if (LdBasePtr.getBase().getNode()) {
@@ -17432,44 +17448,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
 
   // Check for BaseIndexOffset matching.
-  BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
-  BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
+  BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
+  BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
   int64_t PtrDiff;
-  if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
-    return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
+  if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
+    if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
+      return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
 
-  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
-  // able to calculate their relative offset if at least one arises
-  // from an alloca. However, these allocas cannot overlap and we
-  // can infer there is no alias.
-  if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
-    if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
-      MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
-      // If the base are the same frame index but the we couldn't find a
-      // constant offset, (indices are different) be conservative.
-      if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
-                     !MFI.isFixedObjectIndex(B->getIndex())))
-        return false;
-    }
+    // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+    // able to calculate their relative offset if at least one arises
+    // from an alloca. However, these allocas cannot overlap and we
+    // can infer there is no alias.
+    if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+      if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+        MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+        // If the base are the same frame index but the we couldn't find a
+        // constant offset, (indices are different) be conservative.
+        if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+                       !MFI.isFixedObjectIndex(B->getIndex())))
+          return false;
+      }
 
-  bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
-  bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
-  bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
-  bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
-  bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
-  bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+    bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+    bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+    bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+    bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+    bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+    bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
 
-  // If of mismatched base types or checkable indices we can check
-  // they do not alias.
-  if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
-       (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
-      (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
-    return false;
+    // If of mismatched base types or checkable indices we can check
+    // they do not alias.
+    if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+         (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+        (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
+      return false;
+  }
 
-  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
-  // compared to the size and offset of the access, we may be able to prove they
-  // do not alias. This check is conservative for now to catch cases created by
-  // splitting vector types.
+  // If we know required SrcValue1 and SrcValue2 have relatively large
+  // alignment compared to the size and offset of the access, we may be able
+  // to prove they do not alias. This check is conservative for now to catch
+  // cases created by splitting vector types.
   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
@@ -17479,8 +17497,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
 
-    // There is no overlap between these relatively aligned accesses of similar
-    // size. Return no alias.
+    // There is no overlap between these relatively aligned accesses of
+    // similar size. Return no alias.
     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
         (OffAlign1 + NumBytes1) <= OffAlign0)
       return false;
@@ -17643,7 +17661,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
 
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer.
-  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+  BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
 
   // We must have a base and an offset.
   if (!BasePtr.getBase().getNode())
@@ -17669,7 +17687,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
       break;
 
     // Find the base pointer and offset for this memory node.
-    BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
+    BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
 
     // Check that the base pointer is the same as the original one.
     if (!BasePtr.equalBaseIndex(Ptr, DAG))
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bb1dc17b7a1b..b566c232cbc3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     case ISD::ZERO_EXTEND:
       LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
                         DAG.getValueType(AtomicType));
-      RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+      RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
       ExtRes = LHS;
       break;
     case ISD::ANY_EXTEND:
       LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
-      RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+      RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
       break;
     default:
       llvm_unreachable("Invalid atomic op extension");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4c8b63d2f239..3ffc6fa9a059 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7947,11 +7947,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
   if (VT.getSizeInBits() / 8 != Bytes)
     return false;
 
-  SDValue Loc = LD->getOperand(1);
-  SDValue BaseLoc = Base->getOperand(1);
-
-  auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
-  auto LocDecomp = BaseIndexOffset::match(Loc, *this);
+  auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
+  auto LocDecomp = BaseIndexOffset::match(LD, *this);
 
   int64_t Offset = 0;
   if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d5980919d03c..da1574f60524 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -21,6 +21,9 @@ using namespace llvm;
 
 bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
                                      const SelectionDAG &DAG, int64_t &Off) {
+  // Conservatively fail if we a match failed..
+  if (!Base.getNode() || !Other.Base.getNode())
+    return false;
   // Initial Offset difference.
   Off = Other.Offset - Offset;
 
@@ -72,13 +75,29 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
+BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
+                                       const SelectionDAG &DAG) {
+  SDValue Ptr = N->getBasePtr();
+
   // (((B + I*M) + c)) + c ...
   SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
   SDValue Index = SDValue();
   int64_t Offset = 0;
   bool IsIndexSignExt = false;
 
+  // pre-inc/pre-dec ops are components of EA.
+  if (N->getAddressingMode() == ISD::PRE_INC) {
+    if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+      Offset += C->getSExtValue();
+    else // If unknown, give up now.
+      return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+  } else if (N->getAddressingMode() == ISD::PRE_DEC) {
+    if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+      Offset -= C->getSExtValue();
+    else // If unknown, give up now.
+      return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+  }
+
   // Consume constant adds & ors with appropriate masking.
   while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
     if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 224ae1a3236a..b29a33ac1c14 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -132,9 +132,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
     setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
 
-    // Darwin 10 and higher has an optimized __bzero.
-    if (!TT.isMacOSX() || !TT.isMacOSXVersionLT(10, 6) || TT.isArch64Bit()) {
-      setLibcallName(RTLIB::BZERO, TT.isAArch64() ? "bzero" : "__bzero");
+    // Some darwins have an optimized __bzero/bzero function.
+    switch (TT.getArch()) {
+    case Triple::x86:
+    case Triple::x86_64:
+      if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
+        setLibcallName(RTLIB::BZERO, "__bzero");
+      break;
+    case Triple::aarch64:
+      setLibcallName(RTLIB::BZERO, "bzero");
+      break;
+    default:
+      break;
     }
 
     if (darwinHasSinCos(TT)) {
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index ee067a912e3c..f7170e714b9b 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -954,7 +954,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
     NewGV->setLinkage(GlobalValue::InternalLinkage);
 
   Constant *C = NewGV;
-  if (DGV)
+  // Only create a bitcast if necessary. In particular, with
+  // DebugTypeODRUniquing we may reach metadata in the destination module
+  // containing a GV from the source module, in which case SGV will be
+  // the same as DGV and NewGV, and TypeMap.get() will assert since it
+  // assumes it is being invoked on a type in the source module.
+  if (DGV && NewGV != SGV)
     C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
 
   if (DGV && NewGV != DGV) {
diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 82b81ccc24da..5fd5bde9f1eb 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp
@@ -76,6 +76,14 @@ bool CodeViewContext::addFile(MCStreamer &OS, unsigned FileNumber,
   return true;
 }
 
+MCCVFunctionInfo *CodeViewContext::getCVFunctionInfo(unsigned FuncId) {
+  if (FuncId >= Functions.size())
+    return nullptr;
+  if (Functions[FuncId].isUnallocatedFunctionInfo())
+    return nullptr;
+  return &Functions[FuncId];
+}
+
 bool CodeViewContext::recordFunctionId(unsigned FuncId) {
   if (FuncId >= Functions.size())
     Functions.resize(FuncId + 1);
@@ -247,6 +255,67 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS,
   OS.EmitValueImpl(SRE, 4);
 }
 
+void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
+  size_t Offset = MCCVLines.size();
+  auto I = MCCVLineStartStop.insert(
+      {LineEntry.getFunctionId(), {Offset, Offset + 1}});
+  if (!I.second)
+    I.first->second.second = Offset + 1;
+  MCCVLines.push_back(LineEntry);
+}
+
+std::vector<MCCVLineEntry>
+CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
+  std::vector<MCCVLineEntry> FilteredLines;
+  auto I = MCCVLineStartStop.find(FuncId);
+  if (I != MCCVLineStartStop.end()) {
+    MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
+    for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
+         ++Idx) {
+      unsigned LocationFuncId = MCCVLines[Idx].getFunctionId();
+      if (LocationFuncId == FuncId) {
+        // This was a .cv_loc directly for FuncId, so record it.
+        FilteredLines.push_back(MCCVLines[Idx]);
+      } else {
+        // Check if the current location is inlined in this function. If it is,
+        // synthesize a statement .cv_loc at the original inlined call site.
+        auto I = SiteInfo->InlinedAtMap.find(LocationFuncId);
+        if (I != SiteInfo->InlinedAtMap.end()) {
+          MCCVFunctionInfo::LineInfo &IA = I->second;
+          // Only add the location if it differs from the previous location.
+          // Large inlined calls will have many .cv_loc entries and we only need
+          // one line table entry in the parent function.
+          if (FilteredLines.empty() ||
+              FilteredLines.back().getFileNum() != IA.File ||
+              FilteredLines.back().getLine() != IA.Line ||
+              FilteredLines.back().getColumn() != IA.Col) {
+            FilteredLines.push_back(MCCVLineEntry(
+                MCCVLines[Idx].getLabel(),
+                MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false)));
+          }
+        }
+      }
+    }
+  }
+  return FilteredLines;
+}
+
+std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {
+  auto I = MCCVLineStartStop.find(FuncId);
+  // Return an empty extent if there are no cv_locs for this function id.
+  if (I == MCCVLineStartStop.end())
+    return {~0ULL, 0};
+  return I->second;
+}
+
+ArrayRef<MCCVLineEntry> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
+  if (R <= L)
+    return None;
+  if (L >= MCCVLines.size())
+    return None;
+  return makeArrayRef(&MCCVLines[L], R - L);
+}
+
 void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
                                                unsigned FuncId,
                                                const MCSymbol *FuncBegin,
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index c2d3ae31c624..b85b4e082996 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -868,6 +868,40 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     if (OpFlags & AArch64II::MO_GOT) {
       I.setDesc(TII.get(AArch64::LOADgot));
       I.getOperand(1).setTargetFlags(OpFlags);
+    } else if (TM.getCodeModel() == CodeModel::Large) {
+      // Materialize the global using movz/movk instructions.
+      unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+      auto InsertPt = std::next(I.getIterator());
+      auto MovZ =
+          BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
+              .addDef(MovZDstReg);
+      MovZ->addOperand(MF, I.getOperand(1));
+      MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
+                                         AArch64II::MO_NC);
+      MovZ->addOperand(MF, MachineOperand::CreateImm(0));
+      constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
+
+      auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
+                           unsigned Offset, unsigned ForceDstReg) {
+        unsigned DstReg =
+            ForceDstReg ? ForceDstReg
+                        : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+        auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
+                            TII.get(AArch64::MOVKXi))
+                        .addDef(DstReg)
+                        .addReg(SrcReg);
+        MovI->addOperand(MF, MachineOperand::CreateGA(
+                                 GV, MovZ->getOperand(1).getOffset(), Flags));
+        MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
+        constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
+        return DstReg;
+      };
+      unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+                                  AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
+      DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
+      BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+      I.eraseFromParent();
+      return true;
     } else {
       I.setDesc(TII.get(AArch64::MOVaddr));
       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 740861851185..f08c50540656 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -821,7 +821,6 @@ namespace llvm {
                 MutableArrayRef<int> NewMask, unsigned Options = None);
     OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
                 MutableArrayRef<int> NewMask);
-    OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results);
     OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
                 ResultStack &Results);
     OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
@@ -1139,25 +1138,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
   return concat(Out[0], Out[1], Results);
 }
 
-OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) {
-  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
-
-  int VecLen = SM.Mask.size();
-  SmallVector<uint8_t,128> UsedBytes(VecLen);
-  bool HasUnused = false;
-  for (int I = 0; I != VecLen; ++I) {
-    if (SM.Mask[I] != -1)
-      UsedBytes[I] = 0xFF;
-    else
-      HasUnused = true;
-  }
-  if (!HasUnused)
-    return Va;
-  SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode));
-  Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)});
-  return OpRef::res(Results.top());
-}
-
 OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
                          ResultStack &Results) {
   DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f9de65fcb1df..f0e8b11a3d9c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
 
+  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
+  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+
   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
   for (MVT VT : MVT::integer_valuetypes()) {
     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
+  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
+  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
@@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
   return Op;
 }
 
+// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
+// compared to a value that is atomically loaded (atomic loads zero-extend).
+SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
+         "Expecting an atomic compare-and-swap here.");
+  SDLoc dl(Op);
+  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
+  EVT MemVT = AtomicNode->getMemoryVT();
+  if (MemVT.getSizeInBits() >= 32)
+    return Op;
+
+  SDValue CmpOp = Op.getOperand(2);
+  // If this is already correctly zero-extended, leave it alone.
+  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
+  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
+    return Op;
+
+  // Clear the high bits of the compare operand.
+  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
+  SDValue NewCmpOp =
+    DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
+                DAG.getConstant(MaskVal, dl, MVT::i32));
+
+  // Replace the existing compare operand with the properly zero-extended one.
+  SmallVector<SDValue, 4> Ops;
+  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
+    Ops.push_back(AtomicNode->getOperand(i));
+  Ops[2] = NewCmpOp;
+  MachineMemOperand *MMO = AtomicNode->getMemOperand();
+  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
+  auto NodeTy =
+    (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
+  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
+}
+
 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerREM(Op, DAG);
   case ISD::BSWAP:
     return LowerBSWAP(Op, DAG);
+  case ISD::ATOMIC_CMP_SWAP:
+    return LowerATOMIC_CMP_SWAP(Op, DAG);
   }
 }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b119e5b4a564..b3215a84829e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -430,6 +430,11 @@ namespace llvm {
       /// The 4xf32 load used for v4i1 constants.
       QVLFSb,
 
+      /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+      /// except they ensure that the compare input is zero-extended for
+      /// sub-word versions because the atomic loads zero-extend.
+      ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
+
       /// GPRC = TOC_ENTRY GA, TOC
       /// Loads the entry for GA from the TOC, where the TOC base is given by
       /// the last operand.
@@ -955,6 +960,7 @@ namespace llvm {
     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a932d05b24ee..43dcc4479cf0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -257,6 +257,13 @@ def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
 def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
                            [SDNPHasChain, SDNPOptInGlue]>;
 
+// PPC-specific atomic operations.
+def PPCatomicCmpSwap_8 :
+  SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3,
+         [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def PPCatomicCmpSwap_16 :
+  SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3,
+         [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
 def PPClbrx       : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
                            [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def PPCstbrx      : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
@@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in {
   }
 }
 
+def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
+        (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>;
+def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new),
+        (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>;
+
 // Instructions to support atomic operations
 let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
 def LBARX : XForm_1<31,  52, (outs gprc:$rD), (ins memrr:$src),
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f1ce430f3323..f2ffba7d5418 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2375,6 +2375,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
             .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
             .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
     Flags |= Prefix;
+    if (getLexer().is(AsmToken::EndOfStatement)) {
+      // We don't have real instr with the given prefix
+      //  let's use the prefix as the instr.
+      // TODO: there could be several prefixes one after another
+      Flags = X86::IP_NO_PREFIX;
+      break;
+    }
     Name = Parser.getTok().getString();
     Parser.Lex(); // eat the prefix
     // Hack: we could have something like "rep # some comment" or
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a6f56877bd64..e7d9334abe14 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7893,8 +7893,14 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
     IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
                                  VT.getVectorNumElements());
   IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
-  return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV,
-                     SDLoc(V), VT, IndicesVec, SrcVec);
+  if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) {
+    SrcVec =
+        DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
+                    SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
+  }
+  if (VT == MVT::v16i8)
+    return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec);
+  return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec);
 }
 
 SDValue
@@ -18262,6 +18268,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
   }
 
+  // For v64i1 without 64-bit support we need to split and rejoin.
+  if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+    assert(Subtarget.hasBWI() && "Expected BWI to be legal");
+    SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32);
+    SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32);
+    SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32);
+    SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32);
+    SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo);
+    SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi);
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+  }
+
   if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
     SDValue Op1Scalar;
     if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
@@ -28652,13 +28670,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
       }
     }
 
+    SDValue NewV1 = V1; // Save operand in case early exit happens.
     if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
-                                V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
-                                ShuffleVT) &&
+                                NewV1, DL, DAG, Subtarget, Shuffle,
+                                ShuffleSrcVT, ShuffleVT) &&
         (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
       if (Depth == 1 && Root.getOpcode() == Shuffle)
         return SDValue(); // Nothing to do!
-      Res = DAG.getBitcast(ShuffleSrcVT, V1);
+      Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
       DCI.AddToWorklist(Res.getNode());
       Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
       DCI.AddToWorklist(Res.getNode());
@@ -28680,33 +28699,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
     }
   }
 
+  SDValue NewV1 = V1; // Save operands in case early exit happens.
+  SDValue NewV2 = V2;
   if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
-                               V1, V2, DL, DAG, Subtarget, Shuffle,
+                               NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
                                ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
       (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
     if (Depth == 1 && Root.getOpcode() == Shuffle)
       return SDValue(); // Nothing to do!
-    V1 = DAG.getBitcast(ShuffleSrcVT, V1);
-    DCI.AddToWorklist(V1.getNode());
-    V2 = DAG.getBitcast(ShuffleSrcVT, V2);
-    DCI.AddToWorklist(V2.getNode());
-    Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
+    NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
+    DCI.AddToWorklist(NewV1.getNode());
+    NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
+    DCI.AddToWorklist(NewV2.getNode());
+    Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
     DCI.AddToWorklist(Res.getNode());
     return DAG.getBitcast(RootVT, Res);
   }
 
-  if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
-                                      AllowIntDomain, V1, V2, DL, DAG,
-                                      Subtarget, Shuffle, ShuffleVT,
-                                      PermuteImm) &&
+  NewV1 = V1; // Save operands in case early exit happens.
+  NewV2 = V2;
+  if (matchBinaryPermuteVectorShuffle(
+          MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
+          NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
       (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
     if (Depth == 1 && Root.getOpcode() == Shuffle)
       return SDValue(); // Nothing to do!
-    V1 = DAG.getBitcast(ShuffleVT, V1);
-    DCI.AddToWorklist(V1.getNode());
-    V2 = DAG.getBitcast(ShuffleVT, V2);
-    DCI.AddToWorklist(V2.getNode());
-    Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
+    NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
+    DCI.AddToWorklist(NewV1.getNode());
+    NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
+    DCI.AddToWorklist(NewV2.getNode());
+    Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
                       DAG.getConstant(PermuteImm, DL, MVT::i8));
     DCI.AddToWorklist(Res.getNode());
     return DAG.getBitcast(RootVT, Res);
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 223eed3048db..967d67a84bc0 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -754,7 +754,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
   // type remains the same.
   if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
     MVT LegalVT = LT.second;
-    if (LegalVT.getVectorElementType().getSizeInBits() ==
+    if (LegalVT.isVector() &&
+        LegalVT.getVectorElementType().getSizeInBits() ==
             Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
         LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
 
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index c0cd1ea74a74..026fab5dbd3b 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -648,7 +648,7 @@ class GVNHoist {
           // track in a CHI. In the PDom walk, there can be values in the
           // stack which are not control dependent e.g., nested loop.
           if (si != RenameStack.end() && si->second.size() &&
-              DT->dominates(Pred, si->second.back()->getParent())) {
+              DT->properlyDominates(Pred, si->second.back()->getParent())) {
             C.Dest = BB;                     // Assign the edge
             C.I = si->second.pop_back_val(); // Assign the argument
             DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName()
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index b8fb80b6cc26..525425bd0f0c 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -14,7 +14,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/RegionPass.h"
@@ -177,9 +176,8 @@ class StructurizeCFG : public RegionPass {
   Region *ParentRegion;
 
   DominatorTree *DT;
-  LoopInfo *LI;
 
-  SmallVector<RegionNode *, 8> Order;
+  std::deque<RegionNode *> Order;
   BBSet Visited;
 
   BBPhiMap DeletedPhis;
@@ -204,7 +202,7 @@ class StructurizeCFG : public RegionPass {
 
   void gatherPredicates(RegionNode *N);
 
-  void collectInfos();
+  void analyzeNode(RegionNode *N);
 
   void insertConditions(bool Loops);
 
@@ -258,7 +256,6 @@ class StructurizeCFG : public RegionPass {
       AU.addRequired<DivergenceAnalysis>();
     AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfoWrapperPass>();
 
     AU.addPreserved<DominatorTreeWrapperPass>();
     RegionPass::getAnalysisUsage(AU);
@@ -292,55 +289,17 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
 
 /// \brief Build up the general order of nodes
 void StructurizeCFG::orderNodes() {
-  ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
-  SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
+  assert(Visited.empty());
+  assert(Predicates.empty());
+  assert(Loops.empty());
+  assert(LoopPreds.empty());
 
-  // The reverse post-order traversal of the list gives us an ordering close
-  // to what we want.  The only problem with it is that sometimes backedges
-  // for outer loops will be visited before backedges for inner loops.
-  for (RegionNode *RN : RPOT) {
-    BasicBlock *BB = RN->getEntry();
-    Loop *Loop = LI->getLoopFor(BB);
-    ++LoopBlocks[Loop];
+  // This must be RPO order for the back edge detection to work
+  for (RegionNode *RN : ReversePostOrderTraversal<Region*>(ParentRegion)) {
+    // FIXME: Is there a better order to use for structurization?
+    Order.push_back(RN);
+    analyzeNode(RN);
   }
-
-  unsigned CurrentLoopDepth = 0;
-  Loop *CurrentLoop = nullptr;
-  for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
-    BasicBlock *BB = (*I)->getEntry();
-    unsigned LoopDepth = LI->getLoopDepth(BB);
-
-    if (is_contained(Order, *I))
-      continue;
-
-    if (LoopDepth < CurrentLoopDepth) {
-      // Make sure we have visited all blocks in this loop before moving back to
-      // the outer loop.
-
-      auto LoopI = I;
-      while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
-        LoopI++;
-        BasicBlock *LoopBB = (*LoopI)->getEntry();
-        if (LI->getLoopFor(LoopBB) == CurrentLoop) {
-          --BlockCount;
-          Order.push_back(*LoopI);
-        }
-      }
-    }
-
-    CurrentLoop = LI->getLoopFor(BB);
-    if (CurrentLoop)
-      LoopBlocks[CurrentLoop]--;
-
-    CurrentLoopDepth = LoopDepth;
-    Order.push_back(*I);
-  }
-
-  // This pass originally used a post-order traversal and then operated on
-  // the list in reverse. Now that we are using a reverse post-order traversal
-  // rather than re-working the whole pass to operate on the list in order,
-  // we just reverse the list and continue to operate on it in reverse.
-  std::reverse(Order.begin(), Order.end());
 }
 
 /// \brief Determine the end of the loops
@@ -466,32 +425,19 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
 }
 
 /// \brief Collect various loop and predicate infos
-void StructurizeCFG::collectInfos() {
-  // Reset predicate
-  Predicates.clear();
+void StructurizeCFG::analyzeNode(RegionNode *RN) {
+  DEBUG(dbgs() << "Visiting: "
+        << (RN->isSubRegion() ? "SubRegion with entry: " : "")
+        << RN->getEntry()->getName() << '\n');
 
-  // and loop infos
-  Loops.clear();
-  LoopPreds.clear();
+  // Analyze all the conditions leading to a node
+  gatherPredicates(RN);
 
-  // Reset the visited nodes
-  Visited.clear();
+  // Remember that we've seen this node
+  Visited.insert(RN->getEntry());
 
-  for (RegionNode *RN : reverse(Order)) {
-    DEBUG(dbgs() << "Visiting: "
-                 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
-                 << RN->getEntry()->getName() << " Loop Depth: "
-                 << LI->getLoopDepth(RN->getEntry()) << "\n");
-
-    // Analyze all the conditions leading to a node
-    gatherPredicates(RN);
-
-    // Remember that we've seen this node
-    Visited.insert(RN->getEntry());
-
-    // Find the last back edges
-    analyzeLoops(RN);
-  }
+  // Find the last back edges
+  analyzeLoops(RN);
 }
 
 /// \brief Insert the missing branch conditions
@@ -664,7 +610,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
   LLVMContext &Context = Func->getContext();
   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
-                       Order.back()->getEntry();
+                       Order.front()->getEntry();
   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
                                         Func, Insert);
   DT->addNewBlock(Flow, Dominator);
@@ -744,7 +690,8 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
 /// Take one node from the order vector and wire it up
 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
                               BasicBlock *LoopEnd) {
-  RegionNode *Node = Order.pop_back_val();
+  RegionNode *Node = Order.front();
+  Order.pop_front();
   Visited.insert(Node->getEntry());
 
   if (isPredictableTrue(Node)) {
@@ -768,7 +715,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
 
     PrevNode = Node;
     while (!Order.empty() && !Visited.count(LoopEnd) &&
-           dominatesPredicates(Entry, Order.back())) {
+           dominatesPredicates(Entry, Order.front())) {
       handleLoops(false, LoopEnd);
     }
 
@@ -779,7 +726,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
 
 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
                                  BasicBlock *LoopEnd) {
-  RegionNode *Node = Order.back();
+  RegionNode *Node = Order.front();
   BasicBlock *LoopStart = Node->getEntry();
 
   if (!Loops.count(LoopStart)) {
@@ -924,10 +871,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
   ParentRegion = R;
 
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
   orderNodes();
-  collectInfos();
+
   createFlow();
   insertConditions(false);
   insertConditions(true);
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6ef54385c452..64f206ea92eb 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2630,9 +2630,12 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
   Instruction *LastInduction = VecInd;
   for (unsigned Part = 0; Part < UF; ++Part) {
     VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
-    recordVectorLoopValueForInductionCast(II, LastInduction, Part);
+
     if (isa<TruncInst>(EntryVal))
       addMetadata(LastInduction, EntryVal);
+    else
+      recordVectorLoopValueForInductionCast(II, LastInduction, Part);
+
     LastInduction = cast<Instruction>(addFastMathFlag(
         Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
   }
@@ -2754,15 +2757,17 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
 
   // If we haven't yet vectorized the induction variable, splat the scalar
   // induction variable, and build the necessary step vectors.
+  // TODO: Don't do it unless the vectorized IV is really required.
   if (!VectorizedIV) {
     Value *Broadcasted = getBroadcastInstrs(ScalarIV);
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *EntryPart =
           getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
       VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
-      recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
       if (Trunc)
         addMetadata(EntryPart, Trunc);
+      else
+        recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
     }
   }
 
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a7ccd3faec44..f301fc361abc 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1347,7 +1347,6 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
         DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " <<
               Lane << " from " << *Scalar << ".\n");
         ExternalUses.emplace_back(Scalar, nullptr, Lane);
-        continue;
       }
       for (User *U : Scalar->users()) {
         DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -4417,13 +4416,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
   if (!A || !B)
     return false;
   Value *VL[] = { A, B };
-  return tryToVectorizeList(VL, R, None, true);
+  return tryToVectorizeList(VL, R, true);
 }
 
 bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
-                                           ArrayRef<Value *> BuildVector,
-                                           bool AllowReorder,
-                                           bool NeedExtraction) {
+                                           bool AllowReorder) {
   if (VL.size() < 2)
     return false;
 
@@ -4517,12 +4514,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
                    << "\n");
       ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
 
-      ArrayRef<Value *> EmptyArray;
-      ArrayRef<Value *> BuildVectorSlice;
-      if (!BuildVector.empty())
-        BuildVectorSlice = BuildVector.slice(I, OpsWidth);
-
-      R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
+      R.buildTree(Ops);
       // TODO: check if we can allow reordering for more cases.
       if (AllowReorder && R.shouldReorder()) {
         // Conceptually, there is nothing actually preventing us from trying to
@@ -4530,7 +4522,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
         // reductions. However, at this point, we only expect to get here when
         // there are exactly two operations.
         assert(Ops.size() == 2);
-        assert(BuildVectorSlice.empty());
         Value *ReorderedOps[] = {Ops[1], Ops[0]};
         R.buildTree(ReorderedOps, None);
       }
@@ -4550,31 +4541,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
                                  << " and with tree size "
                                  << ore::NV("TreeSize", R.getTreeSize()));
 
-        Value *VectorizedRoot = R.vectorizeTree();
-
-        // Reconstruct the build vector by extracting the vectorized root. This
-        // way we handle the case where some elements of the vector are
-        // undefined.
-        //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
-        if (!BuildVectorSlice.empty()) {
-          // The insert point is the last build vector instruction. The
-          // vectorized root will precede it. This guarantees that we get an
-          // instruction. The vectorized tree could have been constant folded.
-          Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
-          unsigned VecIdx = 0;
-          for (auto &V : BuildVectorSlice) {
-            IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
-                                        ++BasicBlock::iterator(InsertAfter));
-            Instruction *I = cast<Instruction>(V);
-            assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
-            Instruction *Extract =
-                cast<Instruction>(Builder.CreateExtractElement(
-                    VectorizedRoot, Builder.getInt32(VecIdx++)));
-            I->setOperand(1, Extract);
-            I->moveAfter(Extract);
-            InsertAfter = I;
-          }
-        }
+        R.vectorizeTree();
         // Move to the next bundle.
         I += VF - 1;
         NextInst = I + 1;
@@ -5495,11 +5462,9 @@ class HorizontalReduction {
 ///
 /// Returns true if it matches
 static bool findBuildVector(InsertElementInst *LastInsertElem,
-                            SmallVectorImpl<Value *> &BuildVector,
                             SmallVectorImpl<Value *> &BuildVectorOpds) {
   Value *V = nullptr;
   do {
-    BuildVector.push_back(LastInsertElem);
     BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
     V = LastInsertElem->getOperand(0);
     if (isa<UndefValue>(V))
@@ -5508,7 +5473,6 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
     if (!LastInsertElem || !LastInsertElem->hasOneUse())
       return false;
   } while (true);
-  std::reverse(BuildVector.begin(), BuildVector.end());
   std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
   return true;
 }
@@ -5517,11 +5481,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
 ///
 /// \return true if it matches.
 static bool findBuildAggregate(InsertValueInst *IV,
-                               SmallVectorImpl<Value *> &BuildVector,
                                SmallVectorImpl<Value *> &BuildVectorOpds) {
   Value *V;
   do {
-    BuildVector.push_back(IV);
     BuildVectorOpds.push_back(IV->getInsertedValueOperand());
     V = IV->getAggregateOperand();
     if (isa<UndefValue>(V))
@@ -5530,7 +5492,6 @@ static bool findBuildAggregate(InsertValueInst *IV,
     if (!IV || !IV->hasOneUse())
       return false;
   } while (true);
-  std::reverse(BuildVector.begin(), BuildVector.end());
   std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
   return true;
 }
@@ -5706,27 +5667,25 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
   if (!R.canMapToVector(IVI->getType(), DL))
     return false;
 
-  SmallVector<Value *, 16> BuildVector;
   SmallVector<Value *, 16> BuildVectorOpds;
-  if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
+  if (!findBuildAggregate(IVI, BuildVectorOpds))
     return false;
 
   DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
   // Aggregate value is unlikely to be processed in vector register, we need to
   // extract scalars into scalar registers, so NeedExtraction is set true.
-  return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
+  return tryToVectorizeList(BuildVectorOpds, R);
 }
 
 bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
                                                    BasicBlock *BB, BoUpSLP &R) {
-  SmallVector<Value *, 16> BuildVector;
   SmallVector<Value *, 16> BuildVectorOpds;
-  if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
+  if (!findBuildVector(IEI, BuildVectorOpds))
     return false;
 
   // Vectorize starting with the build vector operands ignoring the BuildVector
   // instructions for the purpose of scheduling and user extraction.
-  return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
+  return tryToVectorizeList(BuildVectorOpds, R);
 }
 
 bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
@@ -5804,8 +5763,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       // is done when there are exactly two elements since tryToVectorizeList
       // asserts that there are only two values when AllowReorder is true.
       bool AllowReorder = NumElts == 2;
-      if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
-                                            None, AllowReorder)) {
+      if (NumElts > 1 &&
+          tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {
         // Success start over because instructions might have been changed.
         HaveVectorizedPhiNodes = true;
         Changed = true;
diff --git a/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
new file mode 100644
index 000000000000..12cd832665b3
--- /dev/null
+++ b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
@@ -0,0 +1,61 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+  @foo1 = common global [1073741824 x i32] zeroinitializer, align 4
+  @foo2 = common global [1073741824 x i32] zeroinitializer, align 4
+
+  define i32 @gv_large() {
+  entry:
+    %retval = alloca i32, align 4
+    store i32 0, i32* %retval, align 4
+    %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4
+    %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4
+    %add = add nsw i32 %0, %1
+    ret i32 %add
+  }
+
+...
+---
+name:            gv_large
+legalized:       true
+regBankSelected: true
+stack:
+  - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+constants:
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: gv_large
+    ; CHECK: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0
+    ; CHECK: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16
+    ; CHECK: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32
+    ; CHECK: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]]
+    ; CHECK: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0
+    ; CHECK: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16
+    ; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32
+    ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]]
+    ; CHECK: STRWui %wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval)
+    ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
+    ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
+    ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]]
+    ; CHECK: %w0 = COPY [[ADDWrr]]
+    ; CHECK: RET_ReallyLR implicit %w0
+    %1:gpr(s32) = G_CONSTANT i32 0
+    %4:gpr(p0) = G_GLOBAL_VALUE @foo1
+    %3:gpr(p0) = COPY %4(p0)
+    %7:gpr(p0) = G_GLOBAL_VALUE @foo2
+    %6:gpr(p0) = COPY %7(p0)
+    %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval
+    G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval)
+    %2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
+    %5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
+    %8:gpr(s32) = G_ADD %2, %5
+    %w0 = COPY %8(s32)
+    RET_ReallyLR implicit %w0
+
+...
diff --git a/test/CodeGen/AArch64/atomic-ops-lse.ll b/test/CodeGen/AArch64/atomic-ops-lse.ll
index 49f716547b12..1a5cd2dc4233 100644
--- a/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -629,14 +629,29 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
-
-; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
-; CHECK-NOT: dmb
+; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+; CHECK-NEXT: casab w0, w1, [x[[ADDR]]]
+; CHECK-NEXT: ret
 
    ret i8 %old
 }
 
+define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8_1:
+   %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+   %success = extractvalue { i8, i1 } %pair, 1
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+
+; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp w[[NEW]], w0, uxtb
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+   ret i1 %success
+}
+
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
    %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
@@ -644,14 +659,30 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
-
-; CHECK: casah w0, w1, [x[[ADDR]]]
-; CHECK-NOT: dmb
+; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+; CHECK-NEXT: casah w0, w1, [x[[ADDR]]]
+; CHECK-NEXT: ret
 
    ret i16 %old
 }
 
+define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16_1:
+   %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
+   %success = extractvalue { i16, i1 } %pair, 1
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+
+; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp w[[NEW]], w0, uxth
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+
+   ret i1 %success
+}
+
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
    %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire
diff --git a/test/CodeGen/AMDGPU/multilevel-break.ll b/test/CodeGen/AMDGPU/multilevel-break.ll
index 8cc02d497098..5b556f12f0d6 100644
--- a/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -66,9 +66,10 @@ ENDIF:                                            ; preds = %LOOP
 
 ; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop(
 ; OPT: llvm.amdgcn.break
+; OPT: llvm.amdgcn.break
+; OPT: llvm.amdgcn.if.break
+; OPT: llvm.amdgcn.if.break
 ; OPT: llvm.amdgcn.loop
-; OPT: llvm.amdgcn.if.break
-; OPT: llvm.amdgcn.if.break
 ; OPT: llvm.amdgcn.end.cf
 
 ; GCN-LABEL: {{^}}multi_if_break_loop:
diff --git a/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index 672549c8ea63..96d2841e685f 100644
--- a/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -124,55 +124,100 @@ bb23:                                             ; preds = %bb10
 ; Earlier version of above, before a run of the structurizer.
 ; IR-LABEL: @nested_loop_conditions(
 
-; IR: Flow7:
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %17)
-; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %15)
-; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0
-; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1
-; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow8
+; IR: %tmp1235 = icmp slt i32 %tmp1134, 9
+; IR:   br i1 %tmp1235, label %bb14.lr.ph, label %Flow
 
-; IR: Flow1:
-; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ]
-; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ]
-; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ]
-; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ]
-; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ]
-; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi)
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
-; IR-NEXT: %18 = call i1 @llvm.amdgcn.loop(i64 %17)
-; IR-NEXT: br i1 %18, label %Flow7, label %bb14
-
-; IR: Flow2:
-; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ]
-; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ]
-; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ]
-; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ]
-; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ]
-; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ]
-; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23)
-; IR-NEXT: %25 = extractvalue { i1, i64 } %24, 0
-; IR-NEXT: %26 = extractvalue { i1, i64 } %24, 1
-; IR-NEXT: br i1 %25, label %bb21, label %Flow3
-
-; IR: bb21:
-; IR: %tmp12 = icmp slt i32 %tmp11, 9
-; IR-NEXT: %27 = xor i1 %tmp12, true
-; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken)
-; IR-NEXT: br label %Flow3
+; IR: bb14.lr.ph:
+; IR: br label %bb14
 
 ; IR: Flow3:
-; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ]
-; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ]
-; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
-; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
-; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ]
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26)
-; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4
+; IR:   call void @llvm.amdgcn.end.cf(i64 %18)
+; IR:   %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %17)
+; IR:   %1 = extractvalue { i1, i64 } %0, 0
+; IR:   %2 = extractvalue { i1, i64 } %0, 1
+; IR:   br i1 %1, label %bb4.bb13_crit_edge, label %Flow4
+
+; IR: bb4.bb13_crit_edge:
+; IR:   br label %Flow4
+
+; IR: Flow4:
+; IR:   %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ]
+; IR:   call void @llvm.amdgcn.end.cf(i64 %2)
+; IR:   br label %Flow
+
+; IR: bb13:
+; IR:   br label %bb31
+
+; IR: Flow:
+; IR:   %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ]
+; IR:   %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4)
+; IR:   %6 = extractvalue { i1, i64 } %5, 0
+; IR:   %7 = extractvalue { i1, i64 } %5, 1
+; IR:   br i1 %6, label %bb13, label %bb31
+
+; IR: bb14:
+; IR:   %phi.broken = phi i64 [ %18, %Flow2 ], [ 0, %bb14.lr.ph ]
+; IR:   %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %16, %Flow2 ]
+; IR:   %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %15, %Flow2 ]
+; IR:   %tmp15 = icmp eq i32 %tmp1037, 1
+; IR:   %8 = xor i1 %tmp15, true
+; IR:   %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
+; IR:   %10 = extractvalue { i1, i64 } %9, 0
+; IR:   %11 = extractvalue { i1, i64 } %9, 1
+; IR:   br i1 %10, label %bb31.loopexit, label %Flow1
+
+; IR: Flow1:
+; IR:   %12 = call { i1, i64 } @llvm.amdgcn.else(i64 %11)
+; IR:   %13 = extractvalue { i1, i64 } %12, 0
+; IR:   %14 = extractvalue { i1, i64 } %12, 1
+; IR:   br i1 %13, label %bb16, label %Flow2
+
+; IR: bb16:
+; IR:   %tmp17 = bitcast i64 %tmp3 to <2 x i32>
+; IR:   br label %bb18
+
+; IR: Flow2:
+; IR:   %loop.phi = phi i64 [ %21, %bb21 ], [ %phi.broken, %Flow1 ]
+; IR:   %15 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %Flow1 ]
+; IR:   %16 = phi i32 [ %tmp10, %bb21 ], [ undef, %Flow1 ]
+; IR:   %17 = phi i1 [ %20, %bb21 ], [ false, %Flow1 ]
+; IR:   %18 = call i64 @llvm.amdgcn.else.break(i64 %14, i64 %loop.phi)
+; IR:   call void @llvm.amdgcn.end.cf(i64 %14)
+; IR:   %19 = call i1 @llvm.amdgcn.loop(i64 %18)
+; IR:   br i1 %19, label %Flow3, label %bb14
+
+; IR: bb18:
+; IR:   %tmp19 = load volatile i32, i32 addrspace(1)* undef
+; IR:   %tmp20 = icmp slt i32 %tmp19, 9
+; IR:   br i1 %tmp20, label %bb21, label %bb18
+
+; IR: bb21:
+; IR:   %tmp22 = extractelement <2 x i32> %tmp17, i64 1
+; IR:   %tmp23 = lshr i32 %tmp22, 16
+; IR:   %tmp24 = select i1 undef, i32 undef, i32 %tmp23
+; IR:   %tmp25 = uitofp i32 %tmp24 to float
+; IR:   %tmp26 = fmul float %tmp25, 0x3EF0001000000000
+; IR:   %tmp27 = fsub float %tmp26, undef
+; IR:   %tmp28 = fcmp olt float %tmp27, 5.000000e-01
+; IR:   %tmp29 = select i1 %tmp28, i64 1, i64 2
+; IR:   %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29
+; IR:   %tmp7 = zext i32 %tmp30 to i64
+; IR:   %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7
+; IR:   %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16
+; IR:   %tmp10 = extractelement <4 x i32> %tmp9, i64 0
+; IR:   %tmp11 = load volatile i32, i32 addrspace(1)* undef
+; IR:   %tmp12 = icmp slt i32 %tmp11, 9
+; IR:   %20 = xor i1 %tmp12, true
+; IR:   %21 = call i64 @llvm.amdgcn.if.break(i1 %20, i64 %phi.broken)
+; IR:   br label %Flow2
+
+; IR: bb31.loopexit:
+; IR:   br label %Flow1
 
 ; IR: bb31:
-; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7)
-; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
-; IR-NEXT: ret void
+; IR:   call void @llvm.amdgcn.end.cf(i64 %7)
+; IR:   store volatile i32 0, i32 addrspace(1)* undef
+; IR:   ret void
 
 
 ; GCN-LABEL: {{^}}nested_loop_conditions:
diff --git a/test/CodeGen/ARM/and-load-combine.ll b/test/CodeGen/ARM/and-load-combine.ll
index 2b92778f469d..69b00ed4853a 100644
--- a/test/CodeGen/ARM/and-load-combine.ll
+++ b/test/CodeGen/ARM/and-load-combine.ll
@@ -852,8 +852,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
 ; ARM:       @ %bb.0: @ %entry
 ; ARM-NEXT:    ldrb r0, [r0]
 ; ARM-NEXT:    uxtb r2, r2
-; ARM-NEXT:    and r0, r0, r1
-; ARM-NEXT:    uxtb r1, r0
+; ARM-NEXT:    and r1, r0, r1
 ; ARM-NEXT:    mov r0, #0
 ; ARM-NEXT:    cmp r1, r2
 ; ARM-NEXT:    movweq r0, #1
@@ -863,8 +862,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
 ; ARMEB:       @ %bb.0: @ %entry
 ; ARMEB-NEXT:    ldrb r0, [r0]
 ; ARMEB-NEXT:    uxtb r2, r2
-; ARMEB-NEXT:    and r0, r0, r1
-; ARMEB-NEXT:    uxtb r1, r0
+; ARMEB-NEXT:    and r1, r0, r1
 ; ARMEB-NEXT:    mov r0, #0
 ; ARMEB-NEXT:    cmp r1, r2
 ; ARMEB-NEXT:    movweq r0, #1
@@ -872,9 +870,8 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
 ;
 ; THUMB1-LABEL: test6:
 ; THUMB1:       @ %bb.0: @ %entry
-; THUMB1-NEXT:    ldrb r0, [r0]
-; THUMB1-NEXT:    ands r0, r1
-; THUMB1-NEXT:    uxtb r3, r0
+; THUMB1-NEXT:    ldrb r3, [r0]
+; THUMB1-NEXT:    ands r3, r1
 ; THUMB1-NEXT:    uxtb r2, r2
 ; THUMB1-NEXT:    movs r0, #1
 ; THUMB1-NEXT:    movs r1, #0
@@ -889,8 +886,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
 ; THUMB2:       @ %bb.0: @ %entry
 ; THUMB2-NEXT:    ldrb r0, [r0]
 ; THUMB2-NEXT:    uxtb r2, r2
-; THUMB2-NEXT:    ands r0, r1
-; THUMB2-NEXT:    uxtb r1, r0
+; THUMB2-NEXT:    ands r1, r0
 ; THUMB2-NEXT:    movs r0, #0
 ; THUMB2-NEXT:    cmp r1, r2
 ; THUMB2-NEXT:    it eq
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index a136e44fc196..fec116677085 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -49,9 +49,10 @@ entry:
 ; CHECK-THUMBV6:       mov [[EXPECTED:r[0-9]+]], r1
 ; CHECK-THUMBV6-NEXT:  bl __sync_val_compare_and_swap_1
 ; CHECK-THUMBV6-NEXT:  mov [[RES:r[0-9]+]], r0
+; CHECK-THUMBV6-NEXT:  uxtb [[EXPECTED_ZEXT:r[0-9]+]], [[EXPECTED]]
 ; CHECK-THUMBV6-NEXT:  movs r0, #1
 ; CHECK-THUMBV6-NEXT:  movs [[ZERO:r[0-9]+]], #0
-; CHECK-THUMBV6-NEXT:  cmp [[RES]], [[EXPECTED]]
+; CHECK-THUMBV6-NEXT:  cmp [[RES]], [[EXPECTED_ZEXT]]
 ; CHECK-THUMBV6-NEXT:  beq [[END:.LBB[0-9_]+]]
 ; CHECK-THUMBV6-NEXT:  mov r0, [[ZERO]]
 ; CHECK-THUMBV6-NEXT: [[END]]:
diff --git a/test/CodeGen/ARM/cmpxchg-O0.ll b/test/CodeGen/ARM/cmpxchg-O0.ll
index f8ad2bbbbe0e..b49378d6702e 100644
--- a/test/CodeGen/ARM/cmpxchg-O0.ll
+++ b/test/CodeGen/ARM/cmpxchg-O0.ll
@@ -17,7 +17,8 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:
-; CHECK:     cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
+; CHECK:     uxtb [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
+; CHECK:     cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
 ; CHECK:     {{moveq|movweq}} {{r[0-9]+}}, #1
 ; CHECK:     dmb ish
   %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
@@ -36,7 +37,8 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:
-; CHECK:     cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
+; CHECK:     uxth [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
+; CHECK:     cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
 ; CHECK:     {{moveq|movweq}} {{r[0-9]+}}, #1
 ; CHECK:     dmb ish
   %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
diff --git a/test/CodeGen/ARM/global-merge-dllexport.ll b/test/CodeGen/ARM/global-merge-dllexport.ll
new file mode 100644
index 000000000000..680f57d0a17b
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge-dllexport.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge | FileCheck %s
+
+@x = global i32 0, align 4
+@y = dllexport global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+; CHECK: f1:
+; CHECK: movw [[REG1:r[0-9]+]], :lower16:x
+; CHECK: movt [[REG1]], :upper16:x
+  store i32 %a1, i32* @x, align 4
+  store i32 %a2, i32* @y, align 4
+  ret void
+}
+
+; CHECK-NOT: .L_MergedGlobals
diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll
index 03c977614320..25bbd0869581 100644
--- a/test/CodeGen/ARM/global-merge-external.ll
+++ b/test/CodeGen/ARM/global-merge-external.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge                                 | FileCheck %s --check-prefix=CHECK-MERGE
-; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -global-merge-on-external=true  | FileCheck %s --check-prefix=CHECK-MERGE
-; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE
-; RUN: llc < %s -mtriple=arm-macho -arm-global-merge                                 | FileCheck %s --check-prefix=CHECK-NO-MERGE
-; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -relocation-model=pic           | FileCheck %s --check-prefix=CHECK-NO-MERGE
+; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge                                 | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE
+; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -global-merge-on-external=true  | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE
+; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
+; RUN: llc < %s -mtriple=arm-macho -arm-global-merge                                 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
+; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -relocation-model=pic           | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
+; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge                             | FileCheck %s --check-prefixes=CHECK-WIN32
 
 @x = global i32 0, align 4
 @y = global i32 0, align 4
@@ -10,10 +11,13 @@
 
 define void @f1(i32 %a1, i32 %a2) {
 ;CHECK:          f1:
-;CHECK:          ldr {{r[0-9]+}}, [[LABEL1:\.LCPI[0-9]+_[0-9]]]
+;CHECK:          ldr {{r[0-9]+}}, [[LABEL1:\.?LCPI[0-9]+_[0-9]]]
 ;CHECK:          [[LABEL1]]:
 ;CHECK-MERGE:    .long .L_MergedGlobals
 ;CHECK-NO-MERGE: .long {{_?x}}
+;CHECK-WIN32:    f1:
+;CHECK-WIN32:    movw [[REG1:r[0-9]+]], :lower16:.L_MergedGlobals
+;CHECK-WIN32:    movt [[REG1]], :upper16:.L_MergedGlobals
   store i32 %a1, i32* @x, align 4
   store i32 %a2, i32* @y, align 4
   ret void
@@ -21,10 +25,13 @@ define void @f1(i32 %a1, i32 %a2) {
 
 define void @g1(i32 %a1, i32 %a2) {
 ;CHECK:          g1:
-;CHECK:          ldr {{r[0-9]+}}, [[LABEL2:\.LCPI[0-9]+_[0-9]]]
+;CHECK:          ldr {{r[0-9]+}}, [[LABEL2:\.?LCPI[0-9]+_[0-9]]]
 ;CHECK:          [[LABEL2]]:
 ;CHECK-MERGE:    .long .L_MergedGlobals
 ;CHECK-NO-MERGE: .long {{_?y}}
+;CHECK-WIN32:    g1:
+;CHECK-WIN32:    movw [[REG2:r[0-9]+]], :lower16:.L_MergedGlobals
+;CHECK-WIN32:    movt [[REG2]], :upper16:.L_MergedGlobals
   store i32 %a1, i32* @y, align 4
   store i32 %a2, i32* @z, align 4
   ret void
@@ -35,6 +42,7 @@ define void @g1(i32 %a1, i32 %a2) {
 ;CHECK-MERGE:	.type	.L_MergedGlobals,%object
 ;CHECK-MERGE:	.local	.L_MergedGlobals
 ;CHECK-MERGE:	.comm	.L_MergedGlobals,12,4
+;CHECK-WIN32:	.lcomm	.L_MergedGlobals,12,4
 
 ;CHECK-MERGE:	.globl	x
 ;CHECK-MERGE: x = .L_MergedGlobals
@@ -45,3 +53,10 @@ define void @g1(i32 %a1, i32 %a2) {
 ;CHECK-MERGE:	.globl	z
 ;CHECK-MERGE: z = .L_MergedGlobals+8
 ;CHECK-MERGE: .size z, 4
+
+;CHECK-WIN32:	.globl	x
+;CHECK-WIN32: x = .L_MergedGlobals
+;CHECK-WIN32:	.globl	y
+;CHECK-WIN32: y = .L_MergedGlobals+4
+;CHECK-WIN32:	.globl	z
+;CHECK-WIN32: z = .L_MergedGlobals+8
diff --git a/test/CodeGen/ARM/peephole-phi.mir b/test/CodeGen/ARM/peephole-phi.mir
new file mode 100644
index 000000000000..30343654dea1
--- /dev/null
+++ b/test/CodeGen/ARM/peephole-phi.mir
@@ -0,0 +1,67 @@
+# RUN: llc -o - %s -mtriple=armv7-- -verify-machineinstrs -run-pass=peephole-opt | FileCheck %s
+#
+# Make sure we do not crash on this input.
+# Note that this input could in principle be optimized, but right now we don't
+# have this case implemented so the output should simply be unchanged.
+#
+# CHECK-LABEL: name: func
+# CHECK: body: |
+# CHECK:   bb.0:
+# CHECK:     Bcc %bb.2, 1, undef %cpsr
+#
+# CHECK:   bb.1:
+# CHECK:     %0:dpr = IMPLICIT_DEF
+# CHECK:     %1:gpr, %2:gpr = VMOVRRD %0, 14, %noreg
+# CHECK:     B %bb.3
+#
+# CHECK:   bb.2:
+# CHECK:     %3:spr = IMPLICIT_DEF
+# CHECK:     %4:gpr = VMOVRS %3, 14, %noreg
+#
+# CHECK:   bb.3:
+# CHECK:     %5:gpr = PHI %1, %bb.1, %4, %bb.2
+# CHECK:     %6:spr = VMOVSR %5, 14, %noreg
+---
+name: func0
+tracksRegLiveness: true
+body: |
+  bb.0:
+    Bcc %bb.2, 1, undef %cpsr
+
+  bb.1:
+    %0:dpr = IMPLICIT_DEF
+    %1:gpr, %2:gpr = VMOVRRD %0:dpr, 14, %noreg
+    B %bb.3
+
+  bb.2:
+    %3:spr = IMPLICIT_DEF
+    %4:gpr = VMOVRS %3:spr, 14, %noreg
+
+  bb.3:
+    %5:gpr = PHI %1, %bb.1, %4, %bb.2
+    %6:spr = VMOVSR %5, 14, %noreg
+...
+
+# CHECK-LABEL: name: func1
+# CHECK:    %6:spr = PHI %0, %bb.1, %2, %bb.2
+# CHEKC:    %7:spr = COPY %6
+---
+name: func1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    Bcc %bb.2, 1, undef %cpsr
+
+  bb.1:
+    %1:spr = IMPLICIT_DEF
+    %0:gpr = VMOVRS %1, 14, %noreg
+    B %bb.3
+
+  bb.2:
+    %3:spr = IMPLICIT_DEF
+    %2:gpr = VMOVRS %3:spr, 14, %noreg
+
+  bb.3:
+    %4:gpr = PHI %0, %bb.1, %2, %bb.2
+    %5:spr = VMOVSR %4, 14, %noreg
+...
diff --git a/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
new file mode 100644
index 000000000000..093899690d07
--- /dev/null
+++ b/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Make sure that a negative value for the compare-and-swap is zero extended
+; from i8/i16 to i32 since it will be compared for equality.
+; RUN: llc -mtriple=powerpc64le-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-P7
+
+@str = private unnamed_addr constant [46 x i8] c"FAILED: __atomic_compare_exchange_n() failed.\00"
+@str.1 = private unnamed_addr constant [59 x i8] c"FAILED: __atomic_compare_exchange_n() set the wrong value.\00"
+@str.2 = private unnamed_addr constant [7 x i8] c"PASSED\00"
+
+define signext i32 @main() {
+; CHECK-LABEL: main:
+; CHECK:    li 3, -32477
+; CHECK:    lis 12, 0
+; CHECK:    li 6, 234
+; CHECK:    sth 3, 46(1)
+; CHECK:    ori 4, 12, 33059
+; CHECK:    sync
+; CHECK:  .LBB0_1: # %L.entry
+; CHECK:    lharx 3, 0, 5
+; CHECK:    cmpw 4, 3
+; CHECK:    bne 0, .LBB0_3
+; CHECK:    sthcx. 6, 0, 5
+; CHECK:    bne 0, .LBB0_1
+; CHECK:    b .LBB0_4
+; CHECK:  .LBB0_3: # %L.entry
+; CHECK:    sthcx. 3, 0, 5
+; CHECK:  .LBB0_4: # %L.entry
+; CHECK:    cmplwi 3, 33059
+; CHECK:    lwsync
+; CHECK:    lhz 3, 46(1)
+; CHECK:    cmplwi 3, 234
+;
+; CHECK-P7-LABEL: main:
+; CHECK-P7:    lis 4, 0
+; CHECK-P7:    li 7, 0
+; CHECK-P7:    li 3, -32477
+; CHECK-P7:    sth 3, 46(1)
+; CHECK-P7:    li 5, 234
+; CHECK-P7:    ori 4, 4, 33059
+; CHECK-P7:    rlwinm 3, 6, 3, 27, 27
+; CHECK-P7:    ori 7, 7, 65535
+; CHECK-P7:    sync
+; CHECK-P7:    slw 8, 5, 3
+; CHECK-P7:    slw 5, 7, 3
+; CHECK-P7:    slw 9, 4, 3
+; CHECK-P7:    and 7, 8, 5
+; CHECK-P7:    rldicr 4, 6, 0, 61
+; CHECK-P7:    and 8, 9, 5
+; CHECK-P7:  .LBB0_1: # %L.entry
+; CHECK-P7:    lwarx 9, 0, 4
+; CHECK-P7:    and 6, 9, 5
+; CHECK-P7:    cmpw 0, 6, 8
+; CHECK-P7:    bne 0, .LBB0_3
+; CHECK-P7:    andc 9, 9, 5
+; CHECK-P7:    or 9, 9, 7
+; CHECK-P7:    stwcx. 9, 0, 4
+; CHECK-P7:    bne 0, .LBB0_1
+; CHECK-P7:    b .LBB0_4
+; CHECK-P7:  .LBB0_3: # %L.entry
+; CHECK-P7:    stwcx. 9, 0, 4
+; CHECK-P7:  .LBB0_4: # %L.entry
+; CHECK-P7:    srw 3, 6, 3
+; CHECK-P7:    lwsync
+; CHECK-P7:    cmplwi 3, 33059
+; CHECK-P7:    lhz 3, 46(1)
+; CHECK-P7:    cmplwi 3, 234
+L.entry:
+  %value.addr = alloca i16, align 2
+  store i16 -32477, i16* %value.addr, align 2
+  %0 = cmpxchg i16* %value.addr, i16 -32477, i16 234 seq_cst seq_cst
+  %1 = extractvalue { i16, i1 } %0, 1
+  br i1 %1, label %L.B0000, label %L.B0003
+
+L.B0003:                                          ; preds = %L.entry
+  %puts = call i32 @puts(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @str, i64 0, i64 0))
+  ret i32 1
+
+L.B0000:                                          ; preds = %L.entry
+  %2 = load i16, i16* %value.addr, align 2
+  %3 = icmp eq i16 %2, 234
+  br i1 %3, label %L.B0001, label %L.B0005
+
+L.B0005:                                          ; preds = %L.B0000
+  %puts1 = call i32 @puts(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @str.1, i64 0, i64 0))
+  ret i32 1
+
+L.B0001:                                          ; preds = %L.B0000
+  %puts2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @str.2, i64 0, i64 0))
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #0
diff --git a/test/CodeGen/PowerPC/atomics-regression.ll b/test/CodeGen/PowerPC/atomics-regression.ll
index 7079f6dd52e9..daf55fc426d0 100644
--- a/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/test/CodeGen/PowerPC/atomics-regression.ll
@@ -404,6 +404,7 @@ define void @test39() {
 define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test40:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    b .LBB40_2
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB40_1:
@@ -423,6 +424,7 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test41:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:  .LBB41_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -444,6 +446,7 @@ define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test42:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:  .LBB42_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -465,6 +468,7 @@ define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test43:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB43_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -485,6 +489,7 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test44:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB44_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -505,6 +510,7 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test45:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB45_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -527,6 +533,7 @@ define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test46:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB46_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -549,6 +556,7 @@ define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test47:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB47_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -571,6 +579,7 @@ define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test48:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB48_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -593,6 +602,7 @@ define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test49:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB49_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -615,6 +625,7 @@ define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test50:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    b .LBB50_2
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB50_1:
@@ -634,6 +645,7 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test51:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:  .LBB51_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -655,6 +667,7 @@ define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test52:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:  .LBB52_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -676,6 +689,7 @@ define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test53:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB53_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -696,6 +710,7 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test54:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB54_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -716,6 +731,7 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test55:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB55_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -738,6 +754,7 @@ define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test56:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB56_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -760,6 +777,7 @@ define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test57:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB57_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -782,6 +800,7 @@ define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test58:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB58_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -804,6 +823,7 @@ define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test59(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test59:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB59_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1248,6 +1268,7 @@ define void @test79(i64* %ptr, i64 %cmp, i64 %val) {
 define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test80:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    b .LBB80_2
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB80_1:
@@ -1267,6 +1288,7 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test81:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:  .LBB81_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1288,6 +1310,7 @@ define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test82:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:  .LBB82_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1309,6 +1332,7 @@ define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test83:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB83_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -1329,6 +1353,7 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test84:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB84_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -1349,6 +1374,7 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test85:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB85_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1371,6 +1397,7 @@ define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test86:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB86_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1393,6 +1420,7 @@ define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test87:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB87_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1415,6 +1443,7 @@ define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test88:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB88_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1437,6 +1466,7 @@ define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test89:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB89_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1459,6 +1489,7 @@ define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test90:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    b .LBB90_2
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB90_1:
@@ -1478,6 +1509,7 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test91:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:  .LBB91_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1499,6 +1531,7 @@ define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test92:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:  .LBB92_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1520,6 +1553,7 @@ define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test93:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB93_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -1540,6 +1574,7 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test94:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    b .LBB94_2
 ; PPC64LE-NEXT:    .p2align 5
@@ -1560,6 +1595,7 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test95:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB95_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1582,6 +1618,7 @@ define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test96:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB96_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1604,6 +1641,7 @@ define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test97:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB97_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1626,6 +1664,7 @@ define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test98:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB98_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1648,6 +1687,7 @@ define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test99(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test99:
 ; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB99_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
diff --git a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
index 333efb04913d..1a483355319f 100644
--- a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
+++ b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
@@ -4780,3 +4780,42 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub
   ret <2 x double> %res
 }
 
+; PR35977
+define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
+; CHECK-LABEL: test_zext_v8i8_to_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, (%rsi)
+; CHECK-NEXT:    retq
+  %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
+  %tmp2 = load <8 x i8>, <8 x i8>* %tmp
+  %tmp3 = extractelement <8 x i8> %tmp2, i32 0
+  %tmp4 = zext i8 %tmp3 to i16
+  %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0
+  %tmp6 = extractelement <8 x i8> %tmp2, i32 1
+  %tmp7 = zext i8 %tmp6 to i16
+  %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1
+  %tmp9 = extractelement <8 x i8> %tmp2, i32 2
+  %tmp10 = zext i8 %tmp9 to i16
+  %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2
+  %tmp12 = extractelement <8 x i8> %tmp2, i32 3
+  %tmp13 = zext i8 %tmp12 to i16
+  %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3
+  %tmp15 = extractelement <8 x i8> %tmp2, i32 4
+  %tmp16 = zext i8 %tmp15 to i16
+  %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4
+  %tmp18 = extractelement <8 x i8> %tmp2, i32 5
+  %tmp19 = zext i8 %tmp18 to i16
+  %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5
+  %tmp21 = extractelement <8 x i8> %tmp2, i32 6
+  %tmp22 = zext i8 %tmp21 to i16
+  %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6
+  %tmp24 = extractelement <8 x i8> %tmp2, i32 7
+  %tmp25 = zext i8 %tmp24 to i16
+  %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7
+  %tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0
+  store <8 x i16> %tmp27, <8 x i16>* %tmp28
+  ret void
+}
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index 410d67ff0ec1..3d03ec677a01 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -1,10 +1,13 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck -check-prefixes=CHECK,NOBZERO %s
+; RUN: llc < %s -mtriple=x86_64-apple-ios10.0-simulator | FileCheck -check-prefixes=CHECK,NOBZERO %s
 
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 ; CHECK-LABEL: foo:
-; CHECK: {{calll|callq}} ___bzero
+; BZERO: {{calll|callq}} ___bzero
+; NOBZERO-NOT: bzero
 define void @foo(i8* %p, i32 %len) {
   call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false)
   ret void
diff --git a/test/CodeGen/X86/inline-asm-A-constraint.ll b/test/CodeGen/X86/inline-asm-A-constraint.ll
index 7975b318eff5..2ad011e88e0d 100644
--- a/test/CodeGen/X86/inline-asm-A-constraint.ll
+++ b/test/CodeGen/X86/inline-asm-A-constraint.ll
@@ -19,7 +19,8 @@ entry:
   %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
   ret { i64, i64 } %.fca.1.insert
 }
-; CHECK: lock cmpxchg16b
+; CHECK: lock
+; CHECK-NEXT: cmpxchg16b
 
 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
diff --git a/test/CodeGen/X86/pr35761.ll b/test/CodeGen/X86/pr35761.ll
new file mode 100644
index 000000000000..0bf81bff841f
--- /dev/null
+++ b/test/CodeGen/X86/pr35761.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux %s -o - | FileCheck %s
+
+@x = global i8 0, align 1
+@y = global i32 0, align 4
+@z = global i24 0, align 4
+
+define void @PR35761(i32 %call) {
+; CHECK-LABEL: PR35761:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzbl {{.*}}(%rip), %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    movzbl {{.*}}(%rip), %ecx
+; CHECK-NEXT:    xorl $255, %ecx
+; CHECK-NEXT:    orl %eax, %ecx
+; CHECK-NEXT:    movw %cx, {{.*}}(%rip)
+; CHECK-NEXT:    movb $0, z+{{.*}}(%rip)
+; CHECK-NEXT:    retq
+entry:
+  %0 = load i8, i8* @x, align 1
+  %tobool = trunc i8 %0 to i1
+  %conv = zext i1 %tobool to i32
+  %or = or i32 32767, %call
+  %neg = xor i32 %or, -1
+  %neg1 = xor i32 %neg, -1
+  %1 = load i32, i32* @y, align 4
+  %xor = xor i32 %neg1, %1
+  %or2 = or i32 %conv, %xor
+  %conv3 = trunc i32 %or2 to i8
+  %bf.load = load i24, i24* @z, align 4
+  %2 = zext i8 %conv3 to i24
+  %bf.value = and i24 %2, 4194303
+  store i24 %bf.value, i24* @z, align 2
+  ret void
+}
+
diff --git a/test/CodeGen/X86/pr35972.ll b/test/CodeGen/X86/pr35972.ll
new file mode 100644
index 000000000000..09363fbc89bb
--- /dev/null
+++ b/test/CodeGen/X86/pr35972.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu %s -o - -mattr=avx512bw | FileCheck %s
+
+define void @test3(i32 %c, <64 x i1>* %ptr) {
+; CHECK-LABEL: test3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    sbbl %ecx, %ecx
+; CHECK-NEXT:    kmovd %ecx, %k0
+; CHECK-NEXT:    kunpckdq %k0, %k0, %k0
+; CHECK-NEXT:    kmovq %k0, (%eax)
+; CHECK-NEXT:    retl
+  %cmp = icmp eq i32 %c, 0
+  %insert = insertelement <64 x i1> undef, i1 %cmp, i32 0
+  %shuf = shufflevector <64 x i1> %insert, <64 x i1> undef, <64 x i32> zeroinitializer
+  store <64 x i1> %shuf, <64 x i1>* %ptr
+  ret void
+}
+
diff --git a/test/CodeGen/X86/pr37563.ll b/test/CodeGen/X86/pr37563.ll
new file mode 100644
index 000000000000..934902d8e0d0
--- /dev/null
+++ b/test/CodeGen/X86/pr37563.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
+
+%struct.S = type <{ i16, i24, [5 x i8], i8, i16, [2 x i8] }>
+
+@z = global { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] } { i16 -724, i8 94, i8 -18, i8 5, i8 undef, i8 96, i8 104, i8 -24, i8 10, i8 0, [5 x i8] undef }, align 8
+@tf_3_var_136 = global i64 0, align 8
+@.str = private unnamed_addr constant [6 x i8] c"%llu\0A\00", align 1
+
+define void @PR35763() {
+; CHECK-LABEL: PR35763:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzwl {{.*}}(%rip), %eax
+; CHECK-NEXT:    movzwl z+{{.*}}(%rip), %ecx
+; CHECK-NEXT:    orl %eax, %ecx
+; CHECK-NEXT:    movq %rcx, {{.*}}(%rip)
+; CHECK-NEXT:    movl z+{{.*}}(%rip), %eax
+; CHECK-NEXT:    movzbl z+{{.*}}(%rip), %ecx
+; CHECK-NEXT:    shlq $32, %rcx
+; CHECK-NEXT:    orq %rax, %rcx
+; CHECK-NEXT:    movabsq $1090921758719, %rax # imm = 0xFE0000FFFF
+; CHECK-NEXT:    andq %rcx, %rax
+; CHECK-NEXT:    movl %eax, z+{{.*}}(%rip)
+; CHECK-NEXT:    shrq $32, %rax
+; CHECK-NEXT:    movb %al, z+{{.*}}(%rip)
+; CHECK-NEXT:    retq
+entry:
+  %0 = load i16, i16* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 0), align 8
+  %conv = sext i16 %0 to i32
+  %bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 1) to i32*), align 2
+  %bf.clear = and i32 %bf.load, 2097151
+  %bf.cast = zext i32 %bf.clear to i64
+  %conv1 = trunc i64 %bf.cast to i32
+  %or = or i32 %conv, %conv1
+  %conv2 = trunc i32 %or to i16
+  %conv3 = zext i16 %conv2 to i64
+  store i64 %conv3, i64* @tf_3_var_136, align 8
+  %bf.load4 = load i40, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
+  %bf.clear5 = and i40 %bf.load4, -8589869057
+  store i40 %bf.clear5, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
+  ret void
+}
diff --git a/test/CodeGen/X86/var-permute-128.ll b/test/CodeGen/X86/var-permute-128.ll
index fb5f02e8d5d2..ba78cf7ee180 100644
--- a/test/CodeGen/X86/var-permute-128.ll
+++ b/test/CodeGen/X86/var-permute-128.ll
@@ -207,13 +207,12 @@ define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
 define <16 x i8> @var_shuffle_v16i8(<16 x i8> %v, <16 x i8> %indices) nounwind {
 ; SSSE3-LABEL: var_shuffle_v16i8:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pshufb %xmm0, %xmm1
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; AVX-LABEL: var_shuffle_v16i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpshufb %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %index0 = extractelement <16 x i8> %indices, i32 0
   %index1 = extractelement <16 x i8> %indices, i32 1
diff --git a/test/CodeGen/X86/var-permute-256.ll b/test/CodeGen/X86/var-permute-256.ll
index 3baab2476d40..b624fb087193 100644
--- a/test/CodeGen/X86/var-permute-256.ll
+++ b/test/CodeGen/X86/var-permute-256.ll
@@ -1277,3 +1277,183 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
   %ret7 = insertelement <8 x float> %ret6, float %v7, i32 7
   ret <8 x float> %ret7
 }
+
+define <8 x i32> @pr35820(<4 x i32> %v, <8 x i32> %indices) unnamed_addr nounwind {
+; AVX1-LABEL: pr35820:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    vpextrq $1, %xmm1, %r8
+; AVX1-NEXT:    movq %r8, %r10
+; AVX1-NEXT:    shrq $30, %r10
+; AVX1-NEXT:    vmovq %xmm1, %r9
+; AVX1-NEXT:    movq %r9, %rsi
+; AVX1-NEXT:    shrq $30, %rsi
+; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT:    andl $3, %r9d
+; AVX1-NEXT:    andl $12, %esi
+; AVX1-NEXT:    andl $3, %r8d
+; AVX1-NEXT:    andl $12, %r10d
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-NEXT:    movq %rax, %rdi
+; AVX1-NEXT:    shrq $30, %rdi
+; AVX1-NEXT:    vmovq %xmm0, %rcx
+; AVX1-NEXT:    movq %rcx, %rdx
+; AVX1-NEXT:    shrq $30, %rdx
+; AVX1-NEXT:    andl $3, %ecx
+; AVX1-NEXT:    andl $12, %edx
+; AVX1-NEXT:    andl $3, %eax
+; AVX1-NEXT:    andl $12, %edi
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpinsrd $1, -24(%rsp,%rdx), %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrd $2, -24(%rsp,%rax,4), %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrd $3, -24(%rsp,%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpinsrd $1, -24(%rsp,%rsi), %xmm1, %xmm1
+; AVX1-NEXT:    vpinsrd $2, -24(%rsp,%r8,4), %xmm1, %xmm1
+; AVX1-NEXT:    vpinsrd $3, -24(%rsp,%r10), %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; INT256-LABEL: pr35820:
+; INT256:       # %bb.0: # %entry
+; INT256-NEXT:    # kill: def %xmm0 killed %xmm0 def %ymm0
+; INT256-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; INT256-NEXT:    retq
+entry:
+  %tmp1 = extractelement <8 x i32> %indices, i32 0
+  %vecext2.8 = extractelement <4 x i32> %v, i32 %tmp1
+  %tmp2 = extractelement <8 x i32> %indices, i32 1
+  %vecext2.9 = extractelement <4 x i32> %v, i32 %tmp2
+  %tmp3 = extractelement <8 x i32> %indices, i32 2
+  %vecext2.10 = extractelement <4 x i32> %v, i32 %tmp3
+  %tmp4 = extractelement <8 x i32> %indices, i32 3
+  %vecext2.11 = extractelement <4 x i32> %v, i32 %tmp4
+  %tmp5 = extractelement <8 x i32> %indices, i32 4
+  %vecext2.12 = extractelement <4 x i32> %v, i32 %tmp5
+  %tmp6 = extractelement <8 x i32> %indices, i32 5
+  %vecext2.13 = extractelement <4 x i32> %v, i32 %tmp6
+  %tmp7 = extractelement <8 x i32> %indices, i32 6
+  %vecext2.14 = extractelement <4 x i32> %v, i32 %tmp7
+  %tmp8 = extractelement <8 x i32> %indices, i32 7
+  %vecext2.15 = extractelement <4 x i32> %v, i32 %tmp8
+  %tmp9 = insertelement <8 x i32> undef, i32 %vecext2.8, i32 0
+  %tmp10 = insertelement <8 x i32> %tmp9, i32 %vecext2.9, i32 1
+  %tmp11 = insertelement <8 x i32> %tmp10, i32 %vecext2.10, i32 2
+  %tmp12 = insertelement <8 x i32> %tmp11, i32 %vecext2.11, i32 3
+  %tmp13 = insertelement <8 x i32> %tmp12, i32 %vecext2.12, i32 4
+  %tmp14 = insertelement <8 x i32> %tmp13, i32 %vecext2.13, i32 5
+  %tmp15 = insertelement <8 x i32> %tmp14, i32 %vecext2.14, i32 6
+  %tmp16 = insertelement <8 x i32> %tmp15, i32 %vecext2.15, i32 7
+  ret <8 x i32> %tmp16
+}
+
+define <8 x float> @pr35820_float(<4 x float> %v, <8 x i32> %indices) unnamed_addr nounwind {
+; AVX1-LABEL: pr35820_float:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    vpextrq $1, %xmm1, %r8
+; AVX1-NEXT:    movq %r8, %r10
+; AVX1-NEXT:    shrq $30, %r10
+; AVX1-NEXT:    vmovq %xmm1, %r9
+; AVX1-NEXT:    movq %r9, %rdx
+; AVX1-NEXT:    shrq $30, %rdx
+; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT:    andl $3, %r9d
+; AVX1-NEXT:    andl $12, %edx
+; AVX1-NEXT:    andl $3, %r8d
+; AVX1-NEXT:    andl $12, %r10d
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-NEXT:    movq %rax, %rdi
+; AVX1-NEXT:    shrq $30, %rdi
+; AVX1-NEXT:    vmovq %xmm0, %rcx
+; AVX1-NEXT:    movq %rcx, %rsi
+; AVX1-NEXT:    shrq $30, %rsi
+; AVX1-NEXT:    andl $3, %ecx
+; AVX1-NEXT:    andl $12, %esi
+; AVX1-NEXT:    andl $3, %eax
+; AVX1-NEXT:    andl $12, %edi
+; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; INT256-LABEL: pr35820_float:
+; INT256:       # %bb.0: # %entry
+; INT256-NEXT:    # kill: def %xmm0 killed %xmm0 def %ymm0
+; INT256-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; INT256-NEXT:    retq
+entry:
+  %tmp1 = extractelement <8 x i32> %indices, i32 0
+  %vecext2.8 = extractelement <4 x float> %v, i32 %tmp1
+  %tmp2 = extractelement <8 x i32> %indices, i32 1
+  %vecext2.9 = extractelement <4 x float> %v, i32 %tmp2
+  %tmp3 = extractelement <8 x i32> %indices, i32 2
+  %vecext2.10 = extractelement <4 x float> %v, i32 %tmp3
+  %tmp4 = extractelement <8 x i32> %indices, i32 3
+  %vecext2.11 = extractelement <4 x float> %v, i32 %tmp4
+  %tmp5 = extractelement <8 x i32> %indices, i32 4
+  %vecext2.12 = extractelement <4 x float> %v, i32 %tmp5
+  %tmp6 = extractelement <8 x i32> %indices, i32 5
+  %vecext2.13 = extractelement <4 x float> %v, i32 %tmp6
+  %tmp7 = extractelement <8 x i32> %indices, i32 6
+  %vecext2.14 = extractelement <4 x float> %v, i32 %tmp7
+  %tmp8 = extractelement <8 x i32> %indices, i32 7
+  %vecext2.15 = extractelement <4 x float> %v, i32 %tmp8
+  %tmp9 = insertelement <8 x float> undef, float %vecext2.8, i32 0
+  %tmp10 = insertelement <8 x float> %tmp9, float %vecext2.9, i32 1
+  %tmp11 = insertelement <8 x float> %tmp10, float %vecext2.10, i32 2
+  %tmp12 = insertelement <8 x float> %tmp11, float %vecext2.11, i32 3
+  %tmp13 = insertelement <8 x float> %tmp12, float %vecext2.12, i32 4
+  %tmp14 = insertelement <8 x float> %tmp13, float %vecext2.13, i32 5
+  %tmp15 = insertelement <8 x float> %tmp14, float %vecext2.14, i32 6
+  %tmp16 = insertelement <8 x float> %tmp15, float %vecext2.15, i32 7
+  ret <8 x float> %tmp16
+}
+
+define <4 x i32> @big_source(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind {
+; AVX-LABEL: big_source:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    pushq %rbp
+; AVX-NEXT:    movq %rsp, %rbp
+; AVX-NEXT:    andq $-32, %rsp
+; AVX-NEXT:    subq $64, %rsp
+; AVX-NEXT:    vmovq %xmm1, %rax
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shrq $30, %rcx
+; AVX-NEXT:    andl $28, %ecx
+; AVX-NEXT:    vpextrq $1, %xmm1, %rdx
+; AVX-NEXT:    movq %rdx, %rsi
+; AVX-NEXT:    sarq $32, %rsi
+; AVX-NEXT:    andl $7, %eax
+; AVX-NEXT:    andl $7, %edx
+; AVX-NEXT:    vmovaps %ymm0, (%rsp)
+; AVX-NEXT:    andl $7, %esi
+; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT:    vpinsrd $1, (%rsp,%rcx), %xmm0, %xmm0
+; AVX-NEXT:    vpinsrd $2, (%rsp,%rdx,4), %xmm0, %xmm0
+; AVX-NEXT:    vpinsrd $3, (%rsp,%rsi,4), %xmm0, %xmm0
+; AVX-NEXT:    movq %rbp, %rsp
+; AVX-NEXT:    popq %rbp
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+entry:
+  %tmp1 = extractelement <4 x i32> %indices, i32 0
+  %vecext2.8 = extractelement <8 x i32> %v, i32 %tmp1
+  %tmp2 = extractelement <4 x i32> %indices, i32 1
+  %vecext2.9 = extractelement <8 x i32> %v, i32 %tmp2
+  %tmp3 = extractelement <4 x i32> %indices, i32 2
+  %vecext2.10 = extractelement <8 x i32> %v, i32 %tmp3
+  %tmp4 = extractelement <4 x i32> %indices, i32 3
+  %vecext2.11 = extractelement <8 x i32> %v, i32 %tmp4
+  %tmp9 = insertelement <4 x i32> undef, i32 %vecext2.8, i32 0
+  %tmp10 = insertelement <4 x i32> %tmp9, i32 %vecext2.9, i32 1
+  %tmp11 = insertelement <4 x i32> %tmp10, i32 %vecext2.10, i32 2
+  %tmp12 = insertelement <4 x i32> %tmp11, i32 %vecext2.11, i32 3
+  ret <4 x i32> %tmp12
+}
diff --git a/test/MC/COFF/cv-inline-linetable.s b/test/MC/COFF/cv-inline-linetable.s
index 61a42d92f405..c5e28c4d0785 100644
--- a/test/MC/COFF/cv-inline-linetable.s
+++ b/test/MC/COFF/cv-inline-linetable.s
@@ -135,3 +135,29 @@ Ltmp1:
 	.cv_filechecksums               # File index to string table offset subsection
 	.cv_stringtable                 # String table
 
+# CHECK-LABEL:  FunctionLineTable [
+# CHECK:    LinkageName: ?baz@@YAXXZ
+# CHECK:    Flags: 0x1
+# CHECK:    CodeSize: 0x3D
+# CHECK:    FilenameSegment [
+# CHECK:      Filename: D:\src\llvm\build\t.cpp (0x0)
+# CHECK:      +0x0 [
+# CHECK:        LineNumberStart: 13
+# CHECK:      ]
+# CHECK:      +0x1 [
+# CHECK:        LineNumberStart: 14
+# CHECK:      ]
+# CHECK:      +0x8 [
+# CHECK:        LineNumberStart: 15
+# CHECK:      ]
+#	There shouldn't be any other line number entries because all the other
+#	.cv_locs are on line 15 where the top-level inline call site is.
+# CHECK-NOT: LineNumberStart
+# CHECK:      +0x34 [
+# CHECK:        LineNumberStart: 16
+# CHECK:      ]
+# CHECK:      +0x3B [
+# CHECK:        LineNumberStart: 17
+# CHECK:      ]
+# CHECK:    ]
+# CHECK:  ]
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 23846d921a8c..378af768fa99 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -99,7 +99,8 @@
 // CHECK: shll $2, %eax
         sall $2, %eax
 
-// CHECK: rep movsb
+// CHECK: rep
+// CHECK-NEXT: movsb
 rep     # comment
 movsb
 
@@ -1557,3 +1558,38 @@ ptwriteq 0xdeadbeef(%rbx,%rcx,8)
 // CHECK: ptwriteq %rax
 // CHECK:  encoding: [0xf3,0x48,0x0f,0xae,0xe0]
 ptwriteq %rax
+
+//  __asm __volatile(
+//    "pushf        \n\t"
+//    "popf       \n\t"
+//    "rep        \n\t"
+//    ".byte  0x0f, 0xa7, 0xd0"
+//  );
+// CHECK: pushfq
+// CHECK-NEXT: popfq
+// CHECK-NEXT: rep
+// CHECK-NEXT: .byte 15
+// CHECK-NEXT: .byte 167
+// CHECK-NEXT: .byte 208
+pushfq
+popfq
+rep
+.byte 15
+.byte 167
+.byte 208
+
+// CHECK: lock
+// CHECK: cmpxchgl
+        cmp $0, %edx
+        je 1f
+        lock
+1:      cmpxchgl %ecx,(%rdi)
+
+// CHECK: rep
+// CHECK-NEXT: byte
+rep
+.byte 0xa4      # movsb
+
+// CHECK: lock
+// This line has to be the last one in the file
+lock
diff --git a/test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll b/test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll
new file mode 100644
index 000000000000..9a9ee7223c90
--- /dev/null
+++ b/test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll
@@ -0,0 +1,46 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-scei-ps4"
+
+%struct.CFVS = type { %struct.Vec }
+%struct.Vec = type { i8 }
+%struct.S = type { i8 }
+
+define void @_ZN4CFVSD2Ev(%struct.CFVS* %this) unnamed_addr align 2 !dbg !8 {
+entry:
+  %this.addr = alloca %struct.CFVS*, align 8
+  store %struct.CFVS* %this, %struct.CFVS** %this.addr, align 8
+  %this1 = load %struct.CFVS*, %struct.CFVS** %this.addr, align 8
+  %m_val = getelementptr inbounds %struct.CFVS, %struct.CFVS* %this1, i32 0, i32 0
+  ret void
+}
+
+declare dereferenceable(1) %struct.S* @_Z3Getv()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "bz188598-b.cpp", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!8 = distinct !DISubprogram(name: "~CFVS", linkageName: "_ZN4CFVSD2Ev", scope: !9, file: !1, line: 2, type: !28, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !27, variables: !2)
+!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !10, line: 7, size: 8, elements: !11, identifier: "_ZTS4CFVS")
+!10 = !DIFile(filename: "./bz188598.h", directory: "")
+!11 = !{!12, !27}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !9, file: !10, line: 9, baseType: !13, size: 8)
+!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !10, line: 4, size: 8, elements: !14, templateParams: !19, identifier: "_ZTS3VecIXadL_Z3GetvEEE")
+!14 = !{!35}
+!19 = !{!20}
+!20 = !DITemplateValueParameter(name: "F", type: !21, value: %struct.S* ()* @_Z3Getv)
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
+!22 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !10, line: 2, baseType: !23)
+!23 = !DISubroutineType(types: !24)
+!24 = !{!35}
+!27 = !DISubprogram(name: "~CFVS", scope: !9, file: !10, line: 8, type: !28, isLocal: false, isDefinition: false, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false)
+!28 = !DISubroutineType(types: !29)
+!29 = !{null, !30}
+!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!35 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
diff --git a/test/ThinLTO/X86/dicompositetype-unique2.ll b/test/ThinLTO/X86/dicompositetype-unique2.ll
new file mode 100644
index 000000000000..924579569270
--- /dev/null
+++ b/test/ThinLTO/X86/dicompositetype-unique2.ll
@@ -0,0 +1,69 @@
+; RUN: opt -module-summary -o %t1.bc %s
+; RUN: opt -module-summary -o %t2.bc %S/Inputs/dicompositetype-unique2.ll
+; RUN: llvm-lto --thinlto-action=run %t1.bc %t2.bc -thinlto-save-temps=%t3.
+; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
+; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t --save-temps \
+; RUN: -r %t1.bc,_ZN1CD2Ev,pl \
+; RUN: -r %t1.bc,_ZN4CFVSD2Ev,l \
+; RUN: -r %t1.bc,_Z3Getv,l \
+; RUN: -r %t2.bc,_ZN4CFVSD2Ev,pl \
+; RUN: -r %t2.bc,_Z3Getv,l
+; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s
+
+; Only llvm-lto2 adds the dso_local keyword, hence the {{.*}}
+; CHECK: define available_externally{{.*}} void @_ZN4CFVSD2Ev
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-scei-ps4"
+
+%class.C = type <{ i32 (...)**, %class.A, %struct.CFVS, [6 x i8] }>
+%class.A = type { %struct.Vec }
+%struct.Vec = type { i8 }
+%struct.CFVS = type { %struct.Vec }
+%struct.S = type { i8 }
+
+define void @_ZN1CD2Ev(%class.C* %this) unnamed_addr align 2 !dbg !8 {
+entry:
+  %this.addr = alloca %class.C*, align 8
+  %this1 = load %class.C*, %class.C** %this.addr, align 8
+  %m = getelementptr inbounds %class.C, %class.C* %this1, i32 0, i32 2
+  call void @_ZN4CFVSD2Ev(%struct.CFVS* %m), !dbg !50
+  ret void
+}
+
+declare void @_ZN4CFVSD2Ev(%struct.CFVS*) unnamed_addr
+
+declare dereferenceable(1) %struct.S* @_Z3Getv()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "bz188598-a.cpp", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!8 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", scope: !9, file: !1, line: 9, type: !47, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !46, variables: !2)
+!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C", file: !1, line: 5, size: 128, elements: !10, vtableHolder: !9, identifier: "_ZTS1C")
+!10 = !{!38, !46}
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !16, line: 4, size: 8, elements: !17, templateParams: !22, identifier: "_ZTS3VecIXadL_Z3GetvEEE")
+!16 = !DIFile(filename: "./bz188598.h", directory: ".")
+!17 = !{!55}
+!22 = !{!23}
+!23 = !DITemplateValueParameter(name: "F", type: !24, value: %struct.S* ()* @_Z3Getv)
+!24 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64)
+!25 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !16, line: 2, baseType: !26)
+!26 = !DISubroutineType(types: !27)
+!27 = !{!55}
+!38 = !DIDerivedType(tag: DW_TAG_member, name: "m", scope: !9, file: !1, line: 7, baseType: !39, size: 8, offset: 72)
+!39 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !16, line: 7, size: 8, elements: !40, identifier: "_ZTS4CFVS")
+!40 = !{!41}
+!41 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !39, file: !16, line: 9, baseType: !15, size: 8)
+!46 = !DISubprogram(name: "~C", scope: !9, file: !1, line: 6, type: !47, isLocal: false, isDefinition: false, scopeLine: 6, containingType: !9, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 0, flags: DIFlagPrototyped, isOptimized: false)
+!47 = !DISubroutineType(types: !48)
+!48 = !{!55}
+!50 = !DILocation(line: 9, scope: !51)
+!51 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9)
+!55 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
new file mode 100644
index 000000000000..b153a8b1e53f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true  %s | FileCheck %s --check-prefix=CHECK
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Select when both offset and scale reg are present.
+define i64 @test1(i1 %c, i64* %b, i64 %scale) {
+; CHECK-LABEL: @test1
+entry:
+; CHECK-LABEL: entry:
+  %g = getelementptr inbounds i64, i64* %b, i64 %scale
+  %g1 = getelementptr inbounds i64, i64* %g, i64 8
+  %g2 = getelementptr inbounds i64, i64* %g, i64 16
+  %s = select i1 %c, i64* %g1, i64* %g2
+; CHECK-NOT: sunkaddr
+  %v = load i64 , i64* %s, align 8
+  ret i64 %v
+}
+
diff --git a/test/Transforms/GVNHoist/pr35222-hoist-load.ll b/test/Transforms/GVNHoist/pr35222-hoist-load.ll
index 7e9c62006162..b9b1a870a59b 100644
--- a/test/Transforms/GVNHoist/pr35222-hoist-load.ll
+++ b/test/Transforms/GVNHoist/pr35222-hoist-load.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+; CHECK-LABEL: build_tree
 ; CHECK: load
 ; CHECK: load
 ; Check that the load is not hoisted because the call can potentially
@@ -23,3 +24,47 @@ do.end:                                           ; preds = %do.body
 }
 
 declare i1 @pqdownheap(i32)
+
+@i = external hidden unnamed_addr global i32, align 4
+@j = external hidden unnamed_addr global [573 x i32], align 4
+@v = external global i1
+
+; CHECK-LABEL: test
+; CHECK-LABEL: do.end
+; CHECK: load
+; Check that the load is not hoisted because the call can potentially
+; modify the global
+
+define i32 @test() {
+entry:
+  br label %for.cond
+
+for.cond:
+  %a3 = load volatile i1, i1* @v
+  br i1 %a3, label %for.body, label %while.end
+
+for.body:
+  br label %if.then
+
+if.then:
+  %tmp4 = load i32, i32* @i, align 4
+  br label %for.cond
+
+while.end:
+  br label %do.body
+
+do.body:
+  %tmp9 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j,
+i32 0, i32 1), align 4
+  %tmp10 = load i32, i32* @i, align 4
+  call void @fn()
+  %a1 = load volatile i1, i1* @v
+  br i1 %a1, label %do.body, label %do.end
+
+do.end:
+  %tmp20 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j,
+i32 0, i32 1), align 4
+  ret i32 %tmp20
+}
+
+declare void @fn()
diff --git a/test/Transforms/JumpThreading/ddt-crash3.ll b/test/Transforms/JumpThreading/ddt-crash3.ll
new file mode 100644
index 000000000000..50ac86a3fb5b
--- /dev/null
+++ b/test/Transforms/JumpThreading/ddt-crash3.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -jump-threading -disable-output -verify-dom-info
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@global = external local_unnamed_addr global i64, align 8
+@global.1 = external local_unnamed_addr global i64, align 8
+@global.2 = external local_unnamed_addr global i64, align 8
+
+; Function Attrs: norecurse noreturn nounwind uwtable
+define void @hoge() local_unnamed_addr #0 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb26, %bb
+  %tmp = load i64, i64* @global, align 8, !tbaa !1
+  %tmp2 = icmp eq i64 %tmp, 0
+  br i1 %tmp2, label %bb27, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %tmp4 = load i64, i64* @global.1, align 8, !tbaa !1
+  %tmp5 = icmp eq i64 %tmp4, 0
+  br i1 %tmp5, label %bb23, label %bb23
+
+bb23:                                             ; preds = %bb3, %bb3
+  br label %bb26
+
+bb26:                                             ; preds = %bb27, %bb23
+  br label %bb1
+
+bb27:                                             ; preds = %bb1
+  br label %bb26
+}
+
+attributes #0 = { norecurse noreturn nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 7.0.0 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"long", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/Transforms/JumpThreading/ddt-crash4.ll b/test/Transforms/JumpThreading/ddt-crash4.ll
new file mode 100644
index 000000000000..9bf08395d660
--- /dev/null
+++ b/test/Transforms/JumpThreading/ddt-crash4.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -jump-threading -disable-output -verify-dom-info
+@global = external global i64, align 8
+
+define void @f() {
+bb:
+  br label %bb1
+
+bb1:
+  %tmp = load i64, i64* @global, align 8
+  %tmp2 = icmp eq i64 %tmp, 0
+  br i1 %tmp2, label %bb27, label %bb3
+
+bb3:
+  %tmp4 = load i64, i64* @global, align 8
+  %tmp5 = icmp eq i64 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb7
+
+bb6:
+  br label %bb7
+
+bb7:
+  %tmp8 = phi i1 [ true, %bb3 ], [ undef, %bb6 ]
+  %tmp9 = select i1 %tmp8, i64 %tmp4, i64 0
+  br i1 false, label %bb10, label %bb23
+
+bb10:
+  %tmp11 = load i64, i64* @global, align 8
+  %tmp12 = icmp slt i64 %tmp11, 5
+  br i1 %tmp12, label %bb13, label %bb17
+
+bb13:
+  br label %bb14
+
+bb14:
+  br i1 undef, label %bb15, label %bb16
+
+bb15:
+  unreachable
+
+bb16:
+  br label %bb10
+
+bb17:
+  br label %bb18
+
+bb18:
+  br i1 undef, label %bb22, label %bb13
+
+bb19:
+  br i1 undef, label %bb20, label %bb21
+
+bb20:
+  unreachable
+
+bb21:
+  br label %bb18
+
+bb22:
+  br label %bb23
+
+bb23:
+  br i1 undef, label %bb24, label %bb13
+
+bb24:
+  br i1 undef, label %bb26, label %bb25
+
+bb25:
+  br label %bb19
+
+bb26:
+  br label %bb1
+
+bb27:
+  br label %bb24
+}
diff --git a/test/Transforms/LoopVectorize/pr35773.ll b/test/Transforms/LoopVectorize/pr35773.ll
new file mode 100644
index 000000000000..362ece70b898
--- /dev/null
+++ b/test/Transforms/LoopVectorize/pr35773.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+@a = common local_unnamed_addr global i32 0, align 4
+@b = common local_unnamed_addr global i8 0, align 1
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit1() local_unnamed_addr{
+entry:
+  br label %for.body
+
+for.body:
+  %main.iv = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+
+  %i8.iv = phi i8 [ 0, %entry ], [ %i8.add, %for.body ]
+  %i32.iv = phi i32 [ 0, %entry ], [ %i32.add, %for.body ]
+
+  %trunc.to.be.converted.to.new.iv = trunc i32 %i32.iv to i8
+  %i8.add = add i8 %i8.iv, %trunc.to.be.converted.to.new.iv
+
+  %noop.conv.under.pse = and i32 %i32.iv, 255
+  %i32.add = add nuw nsw i32 %noop.conv.under.pse, 9
+
+  %inc = add i32 %main.iv, 1
+  %tobool = icmp eq i32 %inc, 16
+  br i1 %tobool, label %for.cond.for.end_crit_edge, label %for.body
+
+; CHECK-LABEL: @doit1(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[MAIN_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[MAIN_IV_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[I8_IV:%.*]] = phi <4 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[I8_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I32_IV:%.*]] = phi <4 x i32> [ <i32 0, i32 9, i32 18, i32 27>, [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ <i8 0, i8 9, i8 18, i8 27>, [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ]
+
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[MAIN_IV]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0
+
+; CHECK-NEXT:    [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]]
+
+; CHECK-NEXT:    [[MAIN_IV_NEXT]] = add i32 [[MAIN_IV]], 4
+; CHECK-NEXT:    [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], <i32 36, i32 36, i32 36, i32 36>
+; CHECK-NEXT:    [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], <i8 36, i8 36, i8 36, i8 36>
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+
+for.cond.for.end_crit_edge:
+  store i8 %i8.add, i8* @b, align 1
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
new file mode 100644
index 000000000000..a573fc911eef
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+
+define void @mainTest(i32* %ptr) #0  {
+; CHECK-LABEL: @mainTest(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA5:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 1, undef
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], undef
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], undef
+; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], undef
+; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], undef
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP13]], undef
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
+; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
+; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
+; CHECK-NEXT:    [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP15]], undef
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       bail_out:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp eq i32* %ptr, null
+  br i1 %cmp, label %loop, label %bail_out
+
+loop:
+  %dummy_phi = phi i32 [ 1, %entry ], [ %18, %loop ]
+  %0 = load i32, i32 * %ptr , align 4
+  %1 = mul i32 %0, %0
+  %2 = add i32 1, %1
+  %3 = getelementptr inbounds i32, i32 * %ptr, i64 1
+  %4 = load i32, i32 * %3 , align 4
+  %5 = mul i32 %4, %4
+  %6 = add i32 %2, %4
+  %7 = add i32 %6, %5
+  %8 = getelementptr inbounds i32, i32 *%ptr, i64 2
+  %9 = load i32, i32 * %8 , align 4
+  %10 = mul i32 %9, %9
+  %11 = add i32 %7, %9
+  %12 = add i32 %11, %10
+  %13 = sext i32 %9 to i64
+  %14 = getelementptr inbounds i32, i32 *%ptr, i64 3
+  %15 = load i32, i32 * %14 , align 4
+  %16 = mul i32 %15, %15
+  %17 = add i32 %12, %15
+  %18 = add i32 %17, %16
+  br label %loop
+
+bail_out:
+  ret void
+}
+
+attributes #0 = { "target-cpu"="westmere" }
+
diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
new file mode 100644
index 000000000000..52a6d73db981
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+
+define void @test() #0 {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[DUMMY_ADD:%.*]] = add i16 0, 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i64> <i64 3, i64 2, i64 1, i64 0>, [[TMP4]]
+; CHECK-NEXT:    [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[SUM1:%.*]] = add i64 undef, undef
+; CHECK-NEXT:    [[SUM2:%.*]] = add i64 [[SUM1]], undef
+; CHECK-NEXT:    [[ZSUM:%.*]] = add i64 [[SUM2]], 0
+; CHECK-NEXT:    [[JOIN:%.*]] = add i64 undef, [[ZSUM]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
+; CHECK-NEXT:    [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
+; CHECK-NEXT:    [[LAST:%.*]] = add i64 [[JOIN]], undef
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %dummy_phi = phi i64 [ 1, %entry ], [ %last, %loop ]
+  %0 = phi i64 [ 2, %entry ], [ %fork, %loop ]
+  %inc1 = add i64 %0, 1
+  %inc2 = add i64 %0, 2
+  %inc11 = add i64 1, %inc1
+  %exact1 = ashr exact i64 %inc11, 32
+  %inc3 = add i64 %0, 3
+  %dummy_add = add i16 0, 0
+  %inc12 = add i64 1, %inc2
+  %exact2 = ashr exact i64 %inc12, 32
+  %dummy_shl = shl i64 %inc3, 32
+  %inc13 = add i64 1, %inc3
+  %exact3 = ashr exact i64 %inc13, 32
+  %fork = add i64 %0, 0
+  %sum1 = add i64 %exact3, %exact2
+  %sum2 = add i64 %sum1, %exact1
+  %zsum = add i64 %sum2, 0
+  %sext22 = add i64 1, %fork
+  %exact4 = ashr exact i64 %sext22, 32
+  %join = add i64 %fork, %zsum
+  %last = add i64 %join, %exact4
+  br label %loop
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/PR35777.ll b/test/Transforms/SLPVectorizer/X86/PR35777.ll
new file mode 100644
index 000000000000..f3983d716d08
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/PR35777.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s
+
+@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16
+
+define { i64, i64 } @patatino(double %arg) {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1
+; CHECK-NEXT:    ret { i64, i64 } [[TMP17]]
+;
+bb:
+  %tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
+  %tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
+  %tmp2 = fmul double %tmp1, %arg
+  %tmp3 = fadd double %tmp, %tmp2
+  %tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
+  %tmp5 = fadd double %tmp4, %tmp3
+  %tmp6 = fptosi double %tmp5 to i32
+  %tmp7 = sext i32 %tmp6 to i64
+  %tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
+  %tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
+  %tmp10 = fmul double %tmp9, %arg
+  %tmp11 = fadd double %tmp8, %tmp10
+  %tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
+  %tmp13 = fadd double %tmp12, %tmp11
+  %tmp14 = fptosi double %tmp13 to i32
+  %tmp15 = sext i32 %tmp14 to i64
+  %tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0
+  %tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1
+  ret { i64, i64 } %tmp17
+}
diff --git a/test/Transforms/SLPVectorizer/X86/PR35865.ll b/test/Transforms/SLPVectorizer/X86/PR35865.ll
new file mode 100644
index 000000000000..b022dd7d9155
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/PR35865.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer < %s -S -o - -mtriple=x86_64-apple-macosx10.10.0 -mcpu=core2 | FileCheck %s
+
+define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
+; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
+; CHECK-NEXT:    [[CONV_I_4_I:%.*]] = fpext half [[TMP0]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[CONV_I_4_I]] to i32
+; CHECK-NEXT:    [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 4
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x half> undef, i32 5
+; CHECK-NEXT:    [[CONV_I_5_I:%.*]] = fpext half [[TMP2]] to float
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[CONV_I_5_I]] to i32
+; CHECK-NEXT:    [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP3]], i32 5
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = extractelement <16 x half> undef, i32 4
+  %conv.i.4.i = fpext half %0 to float
+  %1 = bitcast float %conv.i.4.i to i32
+  %vecins.i.4.i = insertelement <8 x i32> undef, i32 %1, i32 4
+  %2 = extractelement <16 x half> undef, i32 5
+  %conv.i.5.i = fpext half %2 to float
+  %3 = bitcast float %conv.i.5.i to i32
+  %vecins.i.5.i = insertelement <8 x i32> %vecins.i.4.i, i32 %3, i32 5
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 46386e8b63e0..750a44736c97 100644
--- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -7,8 +7,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_select(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -20,8 +20,8 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
 ; CHECK-NEXT:    ret <4 x float> [[RD]]
 ;
 ; ZEROTHRESH-LABEL: @simple_select(
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
 ; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -64,18 +64,18 @@ declare void @llvm.assume(i1) nounwind
 ; This entire tree is ephemeral, don't vectorize any of it.
 define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_select_eph(
-; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@@ -100,18 +100,18 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
 ; CHECK-NEXT:    ret <4 x float> undef
 ;
 ; ZEROTHRESH-LABEL: @simple_select_eph(
-; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; ZEROTHRESH-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; ZEROTHRESH-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; ZEROTHRESH-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; ZEROTHRESH-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; ZEROTHRESH-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; ZEROTHRESH-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; ZEROTHRESH-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; ZEROTHRESH-NEXT:    [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; ZEROTHRESH-NEXT:    [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
 ; ZEROTHRESH-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
 ; ZEROTHRESH-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
 ; ZEROTHRESH-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@@ -175,8 +175,8 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
 ; doesn't matter
 define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_select_insert_out_of_order(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -188,8 +188,8 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float
 ; CHECK-NEXT:    ret <4 x float> [[RD]]
 ;
 ; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order(
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
 ; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -233,8 +233,8 @@ declare void @f32_user(float) #0
 ; Multiple users of the final constructed vector
 define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_select_users(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -247,8 +247,8 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
 ; CHECK-NEXT:    ret <4 x float> [[RD]]
 ;
 ; ZEROTHRESH-LABEL: @simple_select_users(
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
 ; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -291,18 +291,18 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
 ; Unused insertelement
 define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_select_no_users(
-; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@@ -330,18 +330,18 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
 ; CHECK-NEXT:    ret <4 x float> [[RD]]
 ;
 ; ZEROTHRESH-LABEL: @simple_select_no_users(
-; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; ZEROTHRESH-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; ZEROTHRESH-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; ZEROTHRESH-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; ZEROTHRESH-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; ZEROTHRESH-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; ZEROTHRESH-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; ZEROTHRESH-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; ZEROTHRESH-NEXT:    [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; ZEROTHRESH-NEXT:    [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
 ; ZEROTHRESH-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
 ; ZEROTHRESH-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
 ; ZEROTHRESH-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@@ -387,25 +387,25 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
 ; to do this backwards this backwards
 define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
 ; CHECK-LABEL: @reconstruct(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
-; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
-; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0
+; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
+; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1
+; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2
+; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3
 ; CHECK-NEXT:    ret <4 x i32> [[RD]]
 ;
 ; ZEROTHRESH-LABEL: @reconstruct(
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
-; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
-; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
-; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
-; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
-; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
+; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; ZEROTHRESH-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0
+; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
+; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
+; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
 ; ZEROTHRESH-NEXT:    ret <4 x i32> [[RD]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
@@ -421,8 +421,8 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
 
 define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_select_v2(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
@@ -430,12 +430,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
 ; CHECK-NEXT:    ret <2 x float> [[RB]]
 ;
 ; ZEROTHRESH-LABEL: @simple_select_v2(
-; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <2 x i32> %c, i32 0
-; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <2 x i32> %c, i32 1
-; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <2 x float> %a, i32 0
-; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <2 x float> %a, i32 1
-; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <2 x float> %b, i32 0
-; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <2 x float> %b, i32 1
+; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <2 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <2 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <2 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <2 x float> [[B]], i32 1
 ; ZEROTHRESH-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
 ; ZEROTHRESH-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
 ; ZEROTHRESH-NEXT:    [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
@@ -464,12 +464,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
 ; (low cost threshold needed to force this to happen)
 define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
 ; CHECK-LABEL: @simple_select_partial_vector(
-; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
+; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@@ -485,12 +485,12 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
 ; CHECK-NEXT:    ret <4 x float> [[RB]]
 ;
 ; ZEROTHRESH-LABEL: @simple_select_partial_vector(
-; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
+; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
 ; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
 ; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
 ; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@@ -530,7 +530,7 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
 ; must be rescheduled. The case here is from compiling Julia.
 define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @reschedule_extract(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -542,7 +542,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    ret <4 x float> [[V3]]
 ;
 ; ZEROTHRESH-LABEL: @reschedule_extract(
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
 ; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; ZEROTHRESH-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -576,7 +576,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
 ; instructions that are erased.
 define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @take_credit(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -588,7 +588,7 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    ret <4 x float> [[V3]]
 ;
 ; ZEROTHRESH-LABEL: @take_credit(
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
 ; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; ZEROTHRESH-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -622,10 +622,10 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
 define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
 ; CHECK-LABEL: @multi_tree(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
@@ -640,10 +640,10 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
 ;
 ; ZEROTHRESH-LABEL: @multi_tree(
 ; ZEROTHRESH-NEXT:  entry:
-; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
-; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
-; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
+; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
+; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
+; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
 ; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
 ; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
 ; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
@@ -675,7 +675,7 @@ entry:
 define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: @_vadd256(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = fadd <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
@@ -696,7 +696,7 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
 ;
 ; ZEROTHRESH-LABEL: @_vadd256(
 ; ZEROTHRESH-NEXT:  entry:
-; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = fadd <8 x float> %a, %b
+; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
 ; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
 ; ZEROTHRESH-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
 ; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
diff --git a/test/Transforms/SLPVectorizer/X86/insertvalue.ll b/test/Transforms/SLPVectorizer/X86/insertvalue.ll
index 5884ee7a2675..1af11609fe6f 100644
--- a/test/Transforms/SLPVectorizer/X86/insertvalue.ll
+++ b/test/Transforms/SLPVectorizer/X86/insertvalue.ll
@@ -1,11 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
 
-; CHECK-LABEL: julia_2xdouble
-; CHECK: load <2 x double>
-; CHECK: load <2 x double>
-; CHECK: fmul <2 x double>
-; CHECK: fadd <2 x double>
 define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) {
+; CHECK-LABEL: @julia_2xdouble(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
+; CHECK-NEXT:    [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>*
+; CHECK-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
+; CHECK-NEXT:    [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
+; CHECK-NEXT:    [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
+; CHECK-NEXT:    store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 top:
   %px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
   %x0 = load double, double* %px0, align 4
@@ -29,12 +48,40 @@ top:
   ret void
 }
 
-; CHECK-LABEL: julia_4xfloat
-; CHECK: load <4 x float>
-; CHECK: load <4 x float>
-; CHECK: fmul <4 x float>
-; CHECK: fadd <4 x float>
 define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
+; CHECK-LABEL: @julia_4xfloat(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1
+; CHECK-NEXT:    [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1
+; CHECK-NEXT:    [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2
+; CHECK-NEXT:    [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2
+; CHECK-NEXT:    [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1
+; CHECK-NEXT:    [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2
+; CHECK-NEXT:    [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>*
+; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
+; CHECK-NEXT:    [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
+; CHECK-NEXT:    [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
+; CHECK-NEXT:    [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
+; CHECK-NEXT:    [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
+; CHECK-NEXT:    store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 top:
   %px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
   %x0 = load float, float* %px0, align 4
@@ -76,9 +123,27 @@ top:
   ret void
 }
 
-; CHECK-LABEL: julia_load_array_of_float
-; CHECK: fsub <4 x float>
 define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
+; CHECK-LABEL: @julia_load_array_of_float(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
+; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
+; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
+; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
+; CHECK-NEXT:    store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 top:
   %a_arr = load [4 x float], [4 x float]* %a, align 4
   %a0 = extractvalue [4 x float] %a_arr, 0
@@ -102,11 +167,27 @@ top:
   ret void
 }
 
-; CHECK-LABEL: julia_load_array_of_i32
-; CHECK: load <4 x i32>
-; CHECK: load <4 x i32>
-; CHECK: sub <4 x i32>
 define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
+; CHECK-LABEL: @julia_load_array_of_i32(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
+; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
+; CHECK-NEXT:    store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 top:
   %a_arr = load [4 x i32], [4 x i32]* %a, align 4
   %a0 = extractvalue [4 x i32] %a_arr, 0
@@ -132,9 +213,30 @@ top:
 
 ; Almost identical to previous test, but for type that should NOT be vectorized.
 ;
-; CHECK-LABEL: julia_load_array_of_i16
-; CHECK-NOT: i2>
 define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
+; CHECK-LABEL: @julia_load_array_of_i16(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4
+; CHECK-NEXT:    [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
+; CHECK-NEXT:    [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
+; CHECK-NEXT:    [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
+; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4
+; CHECK-NEXT:    [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
+; CHECK-NEXT:    [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
+; CHECK-NEXT:    [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
+; CHECK-NEXT:    [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
+; CHECK-NEXT:    [[C1:%.*]] = sub i16 [[A1]], [[B1]]
+; CHECK-NEXT:    [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
+; CHECK-NEXT:    [[C0:%.*]] = sub i16 [[A0]], [[B0]]
+; CHECK-NEXT:    [[C2:%.*]] = sub i16 [[A2]], [[B2]]
+; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
+; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
+; CHECK-NEXT:    [[C3:%.*]] = sub i16 [[A3]], [[B3]]
+; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
+; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
+; CHECK-NEXT:    store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 top:
   %a_arr = load [4 x i16], [4 x i16]* %a, align 4
   %a0 = extractvalue [4 x i16] %a_arr, 0
@@ -160,11 +262,27 @@ top:
 
 %pseudovec = type { float, float, float, float }
 
-; CHECK-LABEL: julia_load_struct_of_float
-; CHECK: load <4 x float>
-; CHECK: load <4 x float>
-; CHECK: fsub <4 x float>
 define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
+; CHECK-LABEL: @julia_load_struct_of_float(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
+; CHECK-NEXT:    [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
+; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
+; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
+; CHECK-NEXT:    store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 top:
   %a_struct = load %pseudovec, %pseudovec* %a, align 4
   %a0 = extractvalue %pseudovec %a_struct, 0
diff --git a/test/Transforms/SLPVectorizer/X86/value-bug.ll b/test/Transforms/SLPVectorizer/X86/value-bug.ll
index 64d2ae1c7d79..7558c724a15d 100644
--- a/test/Transforms/SLPVectorizer/X86/value-bug.ll
+++ b/test/Transforms/SLPVectorizer/X86/value-bug.ll
@@ -1,15 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-grtev3-linux-gnu"
 
 ; We used to crash on this example because we were building a constant
 ; expression during vectorization and the vectorizer expects instructions
 ; as elements of the vectorized tree.
-; CHECK-LABEL: @test
 ; PR19621
 
 define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  bb279:
+; CHECK-NEXT:    br label [[BB283:%.*]]
+; CHECK:       bb283:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ]
+; CHECK-NEXT:    br label [[BB284:%.*]]
+; CHECK:       bb284:
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    br label [[BB21_I:%.*]]
+; CHECK:       bb21.i:
+; CHECK-NEXT:    br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
+; CHECK:       bb22.i:
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
+; CHECK-NEXT:    br label [[BB32_I:%.*]]
+; CHECK:       bb32.i:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
+; CHECK-NEXT:    br i1 undef, label [[BB32_I]], label [[BB21_I]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]]
+; CHECK-NEXT:    [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
+; CHECK-NEXT:    [[TMP317:%.*]] = fptrunc double undef to float
+; CHECK-NEXT:    [[TMP319:%.*]] = fptrunc double undef to float
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0
+; CHECK-NEXT:    [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1
+; CHECK-NEXT:    br label [[BB283]]
+;
 bb279:
   br label %bb283
 
@@ -62,6 +93,12 @@ exit:
 ; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
 ; The code that handles insertelement instructions must handle this.
 define <4 x double> @constant_folding() {
+; CHECK-LABEL: @constant_folding(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
+; CHECK-NEXT:    ret <4 x double> [[I2]]
+;
 entry:
   %t0 = fadd double 1.000000e+00 , 0.000000e+00
   %t1 = fadd double 1.000000e+00 , 1.000000e+00
@@ -71,10 +108,3 @@ entry:
   %i2 = insertelement <4 x double> %i1, double %t3, i32 0
   ret <4 x double> %i2
 }
-
-; CHECK-LABEL: @constant_folding
-; CHECK: %[[V0:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 0
-; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1
-; CHECK: %[[V2:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 1
-; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0
-; CHECK: ret <4 x double> %[[V3]]
diff --git a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
new file mode 100644
index 000000000000..e9c54151cf29
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
@@ -0,0 +1,77 @@
+; XFAIL: *
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s
+
+; FIXME: Merge into backedge-id-bug
+; Variant which has an issue with region construction
+
+define amdgpu_kernel void @loop_backedge_misidentified_alt(i32 addrspace(1)* %arg0) #0 {
+entry:
+  %tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
+  %load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
+  %i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
+  br label %LOOP.HEADER
+
+LOOP.HEADER:
+  %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
+  call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
+  %tmp12 = zext i32 %i to i64
+  %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
+  %tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
+  %tmp15 = extractelement <4 x i32> %tmp14, i64 0
+  %tmp16 = and i32 %tmp15, 65535
+  %tmp17 = icmp eq i32 %tmp16, 1
+  br i1 %tmp17, label %bb18, label %bb62
+
+bb18:
+  %tmp19 = extractelement <2 x i32> %tmp, i64 0
+  %tmp22 = lshr i32 %tmp19, 16
+  %tmp24 = urem i32 %tmp22, 52
+  %tmp25 = mul nuw nsw i32 %tmp24, 52
+  br label %INNER_LOOP
+
+INNER_LOOP:
+  %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
+  call void asm sideeffect "; inner loop body", ""() #0
+  %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
+  %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
+  br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
+
+INNER_LOOP_BREAK:
+  %tmp59 = extractelement <4 x i32> %tmp14, i64 2
+  call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+  br label %END_ELSE_BLOCK
+
+bb62:
+  %load13 = icmp ult i32 %tmp16, 271
+  ;br i1 %load13, label %bb64, label %INCREMENT_I
+  ; branching directly to the return avoids the bug
+  br i1 %load13, label %RETURN, label %INCREMENT_I
+
+
+bb64:
+  call void asm sideeffect "s_nop 42", "~{memory}"() #0
+  br label %RETURN
+
+INCREMENT_I:
+  %inc.i = add i32 %i, 1
+  call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
+  br label %END_ELSE_BLOCK
+
+END_ELSE_BLOCK:
+  %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
+  call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
+  %cmp.end.else.block = icmp eq i32 %i.final, -1
+  br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
+
+RETURN:
+  call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
+  store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { convergent nounwind readnone }
diff --git a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
new file mode 100644
index 000000000000..9cddffdd1795
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg %s | FileCheck %s
+
+; StructurizeCFG::orderNodes used an arbitrary and nonsensical sorting
+; function which broke the basic backedge identification algorithm. It
+; would use RPO order, but then do a weird partial sort by the loop
+; depth assuming blocks are sorted by loop. However a block can appear
+; in between blocks of a loop that is not part of a loop, breaking the
+; assumption of the sort.
+;
+; The collectInfos must be done in RPO order. The actual
+; structurization order I think is less important, but unless the loop
+; headers are identified in RPO order, it finds the wrong set of back
+; edges.
+
+define amdgpu_kernel void @loop_backedge_misidentified(i32 addrspace(1)* %arg0) #0 {
+; CHECK-LABEL: @loop_backedge_misidentified(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
+; CHECK-NEXT:    [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+; CHECK-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]]
+; CHECK-NEXT:    [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       LOOP.HEADER:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[FLOW4:%.*]] ]
+; CHECK-NEXT:    call void asm sideeffect "s_nop 0x100b
+; CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
+; CHECK-NEXT:    [[TMP16:%.*]] = and i32 [[TMP15]], 65535
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TMP17]], true
+; CHECK-NEXT:    br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]]
+; CHECK:       Flow2:
+; CHECK-NEXT:    br label [[FLOW]]
+; CHECK:       bb18:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0
+; CHECK-NEXT:    [[TMP22:%.*]] = lshr i32 [[TMP19]], 16
+; CHECK-NEXT:    [[TMP24:%.*]] = urem i32 [[TMP22]], 52
+; CHECK-NEXT:    [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52
+; CHECK-NEXT:    br label [[INNER_LOOP:%.*]]
+; CHECK:       Flow3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP7:%.*]], [[FLOW]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW4]]
+; CHECK:       INNER_LOOP:
+; CHECK-NEXT:    [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ]
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    [[INNER_LOOP_J_INC]] = add nsw i32 [[INNER_LOOP_J]], 1
+; CHECK-NEXT:    [[INNER_LOOP_CMP:%.*]] = icmp eq i32 [[INNER_LOOP_J]], 0
+; CHECK-NEXT:    br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]]
+; CHECK:       INNER_LOOP_BREAK:
+; CHECK-NEXT:    [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2
+; CHECK-NEXT:    call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+; CHECK-NEXT:    br label [[FLOW3:%.*]]
+; CHECK:       bb62:
+; CHECK-NEXT:    [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[LOAD13]], true
+; CHECK-NEXT:    br i1 [[TMP3]], label [[INCREMENT_I:%.*]], label [[FLOW1:%.*]]
+; CHECK:       Flow1:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I]] ], [ undef, [[BB62]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB64:%.*]], label [[FLOW2:%.*]]
+; CHECK:       bb64:
+; CHECK-NEXT:    call void asm sideeffect "s_nop 42", "~{memory}"() #0
+; CHECK-NEXT:    br label [[FLOW2]]
+; CHECK:       Flow:
+; CHECK-NEXT:    [[TMP7]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP8]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ false, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[BB18]], label [[FLOW3]]
+; CHECK:       INCREMENT_I:
+; CHECK-NEXT:    [[INC_I]] = add i32 [[I]], 1
+; CHECK-NEXT:    call void asm sideeffect "s_nop 0x1336
+; CHECK-NEXT:    br label [[FLOW1]]
+; CHECK:       END_ELSE_BLOCK:
+; CHECK-NEXT:    [[I_FINAL:%.*]] = phi i32 [ [[TMP1]], [[FLOW3]] ]
+; CHECK-NEXT:    call void asm sideeffect "s_nop 0x1337
+; CHECK-NEXT:    [[CMP_END_ELSE_BLOCK:%.*]] = icmp eq i32 [[I_FINAL]], -1
+; CHECK-NEXT:    br label [[FLOW4]]
+; CHECK:       Flow4:
+; CHECK-NEXT:    [[TMP10]] = phi i32 [ [[I_FINAL]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW3]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW3]] ]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[RETURN:%.*]], label [[LOOP_HEADER]]
+; CHECK:       RETURN:
+; CHECK-NEXT:    call void asm sideeffect "s_nop 0x99
+; CHECK-NEXT:    store volatile <2 x float> [[LOAD1]], <2 x float> addrspace(1)* undef, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
+  %load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
+  %i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
+  br label %LOOP.HEADER
+
+LOOP.HEADER:
+  %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
+  call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
+  %tmp12 = zext i32 %i to i64
+  %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
+  %tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
+  %tmp15 = extractelement <4 x i32> %tmp14, i64 0
+  %tmp16 = and i32 %tmp15, 65535
+  %tmp17 = icmp eq i32 %tmp16, 1
+  br i1 %tmp17, label %bb18, label %bb62
+
+bb18:
+  %tmp19 = extractelement <2 x i32> %tmp, i64 0
+  %tmp22 = lshr i32 %tmp19, 16
+  %tmp24 = urem i32 %tmp22, 52
+  %tmp25 = mul nuw nsw i32 %tmp24, 52
+  br label %INNER_LOOP
+
+INNER_LOOP:
+  %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
+  call void asm sideeffect "; inner loop body", ""() #0
+  %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
+  %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
+  br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
+
+INNER_LOOP_BREAK:
+  %tmp59 = extractelement <4 x i32> %tmp14, i64 2
+  call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+  br label %END_ELSE_BLOCK
+
+bb62:
+  %load13 = icmp ult i32 %tmp16, 271
+  br i1 %load13, label %bb64, label %INCREMENT_I
+
+bb64:
+  call void asm sideeffect "s_nop 42", "~{memory}"() #0
+  br label %RETURN
+
+INCREMENT_I:
+  %inc.i = add i32 %i, 1
+  call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
+  br label %END_ELSE_BLOCK
+
+END_ELSE_BLOCK:
+  %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
+  call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
+  %cmp.end.else.block = icmp eq i32 %i.final, -1
+  br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
+
+RETURN:
+  call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
+  store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
+  ret void
+}
+
+; The same function, except break to return block goes directly to the
+; return, which managed to hide the bug.
+; FIXME: Merge variant from backedge-id-bug-xfail
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { convergent nounwind readnone }
diff --git a/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg b/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
index 58634d0d37db..7b5bd5acb629 100644
--- a/test/Transforms/StructurizeCFG/nested-loop-order.ll
+++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
@@ -1,32 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
 
 define void @main(float addrspace(1)* %out) {
-
-; CHECK: main_body:
-; CHECK: br label %LOOP.outer
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    br label [[LOOP_OUTER:%.*]]
+; CHECK:       LOOP.outer:
+; CHECK-NEXT:    [[TEMP8_0_PH:%.*]] = phi float [ 0.000000e+00, [[MAIN_BODY:%.*]] ], [ [[TMP13:%.*]], [[FLOW3:%.*]] ]
+; CHECK-NEXT:    [[TEMP4_0_PH:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP12:%.*]], [[FLOW3]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       LOOP:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP12]], [[FLOW:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ undef, [[LOOP_OUTER]] ], [ [[TMP13]], [[FLOW]] ]
+; CHECK-NEXT:    [[TEMP4_0:%.*]] = phi i32 [ [[TEMP4_0_PH]], [[LOOP_OUTER]] ], [ [[TMP15:%.*]], [[FLOW]] ]
+; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TEMP4_0]], 1
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[TMP22]], true
+; CHECK-NEXT:    br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
+; CHECK:       Flow2:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi float [ [[TEMP8_0_PH]], [[IF29:%.*]] ], [ [[TMP9:%.*]], [[FLOW1:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[TMP20]], [[IF29]] ], [ undef, [[FLOW1]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ [[TMP32:%.*]], [[IF29]] ], [ true, [[FLOW1]] ]
+; CHECK-NEXT:    br label [[FLOW]]
+; CHECK:       Flow3:
+; CHECK-NEXT:    br i1 [[TMP16:%.*]], label [[ENDLOOP:%.*]], label [[LOOP_OUTER]]
+; CHECK:       ENDLOOP:
+; CHECK-NEXT:    [[TEMP8_1:%.*]] = phi float [ [[TMP14:%.*]], [[FLOW3]] ]
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3
+; CHECK-NEXT:    [[DOT45:%.*]] = select i1 [[TMP23]], float 0.000000e+00, float 1.000000e+00
+; CHECK-NEXT:    store float [[DOT45]], float addrspace(1)* [[OUT:%.*]]
+; CHECK-NEXT:    ret void
+; CHECK:       ENDIF:
+; CHECK-NEXT:    [[TMP31:%.*]] = icmp sgt i32 [[TMP20]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[TMP31]], true
+; CHECK-NEXT:    br i1 [[TMP6]], label [[ENDIF28:%.*]], label [[FLOW1]]
+; CHECK:       Flow1:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ [[TMP20]], [[ENDIF28]] ], [ [[TMP0]], [[ENDIF]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi float [ [[TMP35:%.*]], [[ENDIF28]] ], [ [[TMP1]], [[ENDIF]] ]
+; CHECK-NEXT:    [[TMP9]] = phi float [ [[TMP35]], [[ENDIF28]] ], [ [[TEMP8_0_PH]], [[ENDIF]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i1 [ [[TMP36:%.*]], [[ENDIF28]] ], [ true, [[ENDIF]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i1 [ false, [[ENDIF28]] ], [ true, [[ENDIF]] ]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[IF29]], label [[FLOW2:%.*]]
+; CHECK:       IF29:
+; CHECK-NEXT:    [[TMP32]] = icmp sgt i32 [[TMP20]], 2
+; CHECK-NEXT:    br label [[FLOW2]]
+; CHECK:       Flow:
+; CHECK-NEXT:    [[TMP12]] = phi i32 [ [[TMP7]], [[FLOW2]] ], [ [[TMP0]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP13]] = phi float [ [[TMP8]], [[FLOW2]] ], [ [[TMP1]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP14]] = phi float [ [[TMP3]], [[FLOW2]] ], [ [[TEMP8_0_PH]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP15]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP]] ]
+; CHECK-NEXT:    [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW2]] ], [ true, [[LOOP]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ true, [[LOOP]] ]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[FLOW3]], label [[LOOP]]
+; CHECK:       ENDIF28:
+; CHECK-NEXT:    [[TMP35]] = fadd float [[TEMP8_0_PH]], 1.000000e+00
+; CHECK-NEXT:    [[TMP36]] = icmp sgt i32 [[TMP20]], 2
+; CHECK-NEXT:    br label [[FLOW1]]
+;
 main_body:
   br label %LOOP.outer
 
-; CHECK: LOOP.outer:
-; CHECK: br label %LOOP
 LOOP.outer:                                       ; preds = %ENDIF28, %main_body
   %temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
   %temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
   br label %LOOP
 
-; CHECK: LOOP:
-; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
 LOOP:                                             ; preds = %IF29, %LOOP.outer
   %temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
   %tmp20 = add i32 %temp4.0, 1
   %tmp22 = icmp sgt i32 %tmp20, 3
   br i1 %tmp22, label %ENDLOOP, label %ENDIF
 
-; CHECK: Flow3
-; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
-
-; CHECK: ENDLOOP:
-; CHECK: ret void
 ENDLOOP:                                          ; preds = %ENDIF28, %IF29, %LOOP
   %temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
   %tmp23 = icmp eq i32 %tmp20, 3
@@ -34,29 +78,14 @@ ENDLOOP:                                          ; preds = %ENDIF28, %IF29, %LO
   store float %.45, float addrspace(1)* %out
   ret void
 
-; CHECK: ENDIF:
-; CHECK: br i1 %tmp31, label %IF29, label %Flow1
 ENDIF:                                            ; preds = %LOOP
   %tmp31 = icmp sgt i32 %tmp20, 1
   br i1 %tmp31, label %IF29, label %ENDIF28
 
-; CHECK: Flow:
-; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
-
-; CHECK: IF29:
-; CHECK: br label %Flow1
 IF29:                                             ; preds = %ENDIF
   %tmp32 = icmp sgt i32 %tmp20, 2
   br i1 %tmp32, label %ENDLOOP, label %LOOP
 
-; CHECK: Flow1:
-; CHECK: br label %Flow
-
-; CHECK: Flow2:
-; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
-
-; CHECK: ENDIF28:
-; CHECK: br label %Flow3
 ENDIF28:                                          ; preds = %ENDIF
   %tmp35 = fadd float %temp8.0.ph, 1.0
   %tmp36 = icmp sgt i32 %tmp20, 2
diff --git a/test/tools/llvm-readobj/macho-needed-libs.test b/test/tools/llvm-readobj/macho-needed-libs.test
new file mode 100644
index 000000000000..22e6948e758f
--- /dev/null
+++ b/test/tools/llvm-readobj/macho-needed-libs.test
@@ -0,0 +1,26 @@
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-readobj -needed-libs %t.o | FileCheck %s
+
+# CHECK:      NeededLibraries [
+# CHECK-NEXT:   /usr/lib/libSystem.B.dylib
+# CHECK-NEXT: ]
+
+!mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           1
+  sizeofcmds:      56
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 81985536
+      compatibility_version: 65536
+    PayloadString:   /usr/lib/libSystem.B.dylib
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
index 39e909279937..64178d7b33ad 100644
--- a/tools/llvm-readobj/MachODumper.cpp
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -39,6 +39,8 @@ class MachODumper : public ObjDumper {
   void printUnwindInfo() override;
   void printStackMap() const override;
 
+  void printNeededLibraries() override;
+
   // MachO-specific.
   void printMachODataInCode() override;
   void printMachOVersionMin() override;
@@ -675,6 +677,34 @@ void MachODumper::printStackMap() const {
                          StackMapV2Parser<support::big>(StackMapContentsArray));
 }
 
+void MachODumper::printNeededLibraries() {
+  ListScope D(W, "NeededLibraries");
+
+  using LibsTy = std::vector<StringRef>;
+  LibsTy Libs;
+
+  for (const auto &Command : Obj->load_commands()) {
+    if (Command.C.cmd == MachO::LC_LOAD_DYLIB ||
+        Command.C.cmd == MachO::LC_ID_DYLIB ||
+        Command.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
+        Command.C.cmd == MachO::LC_REEXPORT_DYLIB ||
+        Command.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
+        Command.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
+      MachO::dylib_command Dl = Obj->getDylibIDLoadCommand(Command);
+      if (Dl.dylib.name < Dl.cmdsize) {
+        auto *P = static_cast<const char*>(Command.Ptr) + Dl.dylib.name;
+        Libs.push_back(P);
+      }
+    }
+  }
+
+  std::stable_sort(Libs.begin(), Libs.end());
+
+  for (const auto &L : Libs) {
+    outs() << "  " << L << "\n";
+  }
+}
+
 void MachODumper::printMachODataInCode() {
   for (const auto &Load : Obj->load_commands()) {
     if (Load.C.cmd  == MachO::LC_DATA_IN_CODE) {
diff --git a/unittests/IR/DominatorTreeBatchUpdatesTest.cpp b/unittests/IR/DominatorTreeBatchUpdatesTest.cpp
index 4ad1f69030c1..e362afd84048 100644
--- a/unittests/IR/DominatorTreeBatchUpdatesTest.cpp
+++ b/unittests/IR/DominatorTreeBatchUpdatesTest.cpp
@@ -258,3 +258,98 @@ TEST(DominatorTreeBatchUpdates, InsertDeleteExhaustive) {
     EXPECT_TRUE(PDT.verify());
   }
 }
+
+// These are some odd flowgraphs, usually generated from csmith cases,
+// which are difficult on post dom trees.
+TEST(DominatorTreeBatchUpdates, InfiniteLoop) {
+  std::vector<CFGBuilder::Arc> Arcs = {
+      {"1", "2"},
+      {"2", "3"},
+      {"3", "6"}, {"3", "5"},
+      {"4", "5"},
+      {"5", "2"},
+      {"6", "3"}, {"6", "4"}};
+
+  // SplitBlock on 3 -> 5
+  std::vector<CFGBuilder::Update> Updates = {
+      {CFGInsert, {"N", "5"}},  {CFGInsert, {"3", "N"}}, {CFGDelete, {"3", "5"}}};
+
+  CFGHolder Holder;
+  CFGBuilder B(Holder.F, Arcs, Updates);
+  DominatorTree DT(*Holder.F);
+  EXPECT_TRUE(DT.verify());
+  PostDomTree PDT(*Holder.F);
+  EXPECT_TRUE(PDT.verify());
+
+  while (B.applyUpdate())
+    ;
+
+  auto DomUpdates = ToDomUpdates(B, Updates);
+  DT.applyUpdates(DomUpdates);
+  EXPECT_TRUE(DT.verify());
+  PDT.applyUpdates(DomUpdates);
+  EXPECT_TRUE(PDT.verify());
+}
+
+TEST(DominatorTreeBatchUpdates, DeadBlocks) {
+  std::vector<CFGBuilder::Arc> Arcs = {
+      {"1", "2"},
+      {"2", "3"},
+      {"3", "4"}, {"3", "7"},
+      {"4", "4"},
+      {"5", "6"}, {"5", "7"},
+      {"6", "7"},
+      {"7", "2"}, {"7", "8"}};
+
+  // Remove dead 5 and 7,
+  // plus SplitBlock on 7 -> 8
+  std::vector<CFGBuilder::Update> Updates = {
+      {CFGDelete, {"6", "7"}},  {CFGDelete, {"5", "7"}}, {CFGDelete, {"5", "6"}},
+      {CFGInsert, {"N", "8"}},  {CFGInsert, {"7", "N"}}, {CFGDelete, {"7", "8"}}};
+
+  CFGHolder Holder;
+  CFGBuilder B(Holder.F, Arcs, Updates);
+  DominatorTree DT(*Holder.F);
+  EXPECT_TRUE(DT.verify());
+  PostDomTree PDT(*Holder.F);
+  EXPECT_TRUE(PDT.verify());
+
+  while (B.applyUpdate())
+    ;
+
+  auto DomUpdates = ToDomUpdates(B, Updates);
+  DT.applyUpdates(DomUpdates);
+  EXPECT_TRUE(DT.verify());
+  PDT.applyUpdates(DomUpdates);
+  EXPECT_TRUE(PDT.verify());
+}
+
+TEST(DominatorTreeBatchUpdates, InfiniteLoop2) {
+  std::vector<CFGBuilder::Arc> Arcs = {
+      {"1", "2"},
+      {"2", "6"}, {"2", "3"},
+      {"3", "4"},
+      {"4", "5"}, {"4", "6"},
+      {"5", "4"},
+      {"6", "2"}};
+
+  // SplitBlock on 4 -> 6
+  std::vector<CFGBuilder::Update> Updates = {
+      {CFGInsert, {"N", "6"}},  {CFGInsert, {"4", "N"}}, {CFGDelete, {"4", "6"}}};
+
+  CFGHolder Holder;
+  CFGBuilder B(Holder.F, Arcs, Updates);
+  DominatorTree DT(*Holder.F);
+  EXPECT_TRUE(DT.verify());
+  PostDomTree PDT(*Holder.F);
+  EXPECT_TRUE(PDT.verify());
+
+  while (B.applyUpdate())
+    ;
+
+  auto DomUpdates = ToDomUpdates(B, Updates);
+  DT.applyUpdates(DomUpdates);
+  EXPECT_TRUE(DT.verify());
+  PDT.applyUpdates(DomUpdates);
+  EXPECT_TRUE(PDT.verify());
+}
diff --git a/unittests/IR/DominatorTreeTest.cpp b/unittests/IR/DominatorTreeTest.cpp
index bf5aced49289..4666f93da2d9 100644
--- a/unittests/IR/DominatorTreeTest.cpp
+++ b/unittests/IR/DominatorTreeTest.cpp
@@ -925,3 +925,28 @@ TEST(DominatorTree, InsertDeleteExhaustive) {
     }
   }
 }
+
+TEST(DominatorTree, InsertIntoIrreducible) {
+  std::vector<CFGBuilder::Arc> Arcs = {
+      {"0", "1"},
+      {"1", "27"}, {"1", "7"},
+      {"10", "18"},
+      {"13", "10"},
+      {"18", "13"}, {"18", "23"},
+      {"23", "13"}, {"23", "24"},
+      {"24", "1"}, {"24", "18"},
+      {"27", "24"}};
+
+  CFGHolder Holder;
+  CFGBuilder B(Holder.F, Arcs, {{Insert, {"7", "23"}}});
+  DominatorTree DT(*Holder.F);
+  EXPECT_TRUE(DT.verify());
+
+  B.applyUpdate();
+  BasicBlock *From = B.getOrAddBlock("7");
+  BasicBlock *To = B.getOrAddBlock("23");
+  DT.insertEdge(From, To);
+
+  EXPECT_TRUE(DT.verify());
+}
+
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index 66a2c578083e..440dee53c1b7 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -33,6 +33,7 @@ do_asserts="no"
 do_compare="yes"
 do_rt="yes"
 do_libs="yes"
+do_libcxxabi="yes"
 do_libunwind="yes"
 do_test_suite="yes"
 do_openmp="yes"
@@ -62,6 +63,7 @@ function usage() {
     echo "                      For example -svn-path trunk or -svn-path branches/release_37"
     echo " -no-rt               Disable check-out & build Compiler-RT"
     echo " -no-libs             Disable check-out & build libcxx/libcxxabi/libunwind"
+    echo " -no-libcxxabi        Disable check-out & build libcxxabi"
     echo " -no-libunwind        Disable check-out & build libunwind"
     echo " -no-test-suite       Disable check-out & build test-suite"
     echo " -no-openmp           Disable check-out & build libomp"
@@ -135,6 +137,9 @@ while [ $# -gt 0 ]; do
         -no-libs )
             do_libs="no"
             ;;
+        -no-libcxxabi )
+            do_libcxxabi="no"
+            ;;
         -no-libunwind )
             do_libunwind="no"
             ;;
@@ -206,7 +211,10 @@ if [ $do_rt = "yes" ]; then
   projects="$projects compiler-rt"
 fi
 if [ $do_libs = "yes" ]; then
-  projects="$projects libcxx libcxxabi"
+  projects="$projects libcxx"
+  if [ $do_libcxxabi = "yes" ]; then
+    projects="$projects libcxxabi"
+  fi
   if [ $do_libunwind = "yes" ]; then
     projects="$projects libunwind"
   fi

From 520a89e9d38bb1c9cc2de3f875eae3ac69f9f08a Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 24 Jan 2018 20:25:37 +0000
Subject: [PATCH 2/6] Vendor import of clang release_60 branch r323338:
 https://llvm.org/svn/llvm-project/cfe/branches/release_60@323338

---
 docs/OpenMPSupport.rst                        |  68 ++++++++++
 docs/ReleaseNotes.rst                         |  18 ++-
 docs/index.rst                                |   1 +
 include/clang/Basic/Attr.td                   |   1 +
 include/clang/Basic/BuiltinsX86.def           |  18 +--
 include/clang/Basic/DiagnosticGroups.td       |   4 +-
 include/clang/Basic/TokenKinds.def            |   1 -
 .../StaticAnalyzer/Core/BugReporter/BugType.h |  38 ++++--
 lib/AST/DeclBase.cpp                          |   2 +
 lib/AST/ODRHash.cpp                           |   2 +
 lib/CodeGen/CGBuiltin.cpp                     |   6 +-
 .../ObjectFilePCHContainerOperations.cpp      |   5 +
 lib/Frontend/InitPreprocessor.cpp             |   4 -
 lib/Lex/Lexer.cpp                             |  19 +--
 lib/Lex/PPCaching.cpp                         |   4 +-
 lib/Lex/PPLexerChange.cpp                     |   1 +
 lib/Sema/Scope.cpp                            |  99 ++++++---------
 lib/Sema/SemaTemplateDeduction.cpp            |   4 +
 lib/Sema/SemaTemplateInstantiateDecl.cpp      |   3 +-
 lib/StaticAnalyzer/Checkers/MallocChecker.cpp |   7 +-
 lib/StaticAnalyzer/Checkers/ValistChecker.cpp |  18 +--
 test/Analysis/malloc.c                        |   7 --
 test/CodeCompletion/Inputs/comments.h         |   4 +
 test/CodeCompletion/comments.cpp              |  13 ++
 test/CodeGen/builtins-overflow.c              |   8 +-
 test/CodeGenCXX/cxx1z-inline-variables.cpp    |   8 ++
 test/Lexer/null-character-in-literal.c        | Bin 0 -> 917 bytes
 test/Modules/ExtDebugInfo.cpp                 |   2 +-
 test/Modules/Inputs/DebugCXX.h                |   3 +
 test/Modules/Inputs/odr_hash-Friend/Box.h     |  14 +++
 test/Modules/Inputs/odr_hash-Friend/M1.h      |   6 +
 test/Modules/Inputs/odr_hash-Friend/M2.h      |   5 +
 test/Modules/Inputs/odr_hash-Friend/M3.h      |   7 ++
 .../Inputs/odr_hash-Friend/module.modulemap   |  15 +++
 test/Modules/ModuleDebugInfo.cpp              |  17 ++-
 test/Modules/odr_hash-Friend.cpp              |  22 ++++
 test/Modules/odr_hash-blocks.cpp              | 119 ++++++++++++++++++
 test/Preprocessor/cuda-types.cu               |  20 +--
 test/Sema/_Float128.c                         |  22 ----
 test/Sema/tautological-constant-compare.c     |   4 +-
 ...xx1z-class-template-argument-deduction.cpp |  11 ++
 test/SemaTemplate/alignas.cpp                 |  11 ++
 test/SemaTemplate/cxx17-inline-variables.cpp  |  11 ++
 unittests/Lex/LexerTest.cpp                   |   2 +
 44 files changed, 494 insertions(+), 160 deletions(-)
 create mode 100644 docs/OpenMPSupport.rst
 create mode 100644 test/CodeCompletion/Inputs/comments.h
 create mode 100644 test/CodeCompletion/comments.cpp
 create mode 100644 test/Lexer/null-character-in-literal.c
 create mode 100644 test/Modules/Inputs/odr_hash-Friend/Box.h
 create mode 100644 test/Modules/Inputs/odr_hash-Friend/M1.h
 create mode 100644 test/Modules/Inputs/odr_hash-Friend/M2.h
 create mode 100644 test/Modules/Inputs/odr_hash-Friend/M3.h
 create mode 100644 test/Modules/Inputs/odr_hash-Friend/module.modulemap
 create mode 100644 test/Modules/odr_hash-Friend.cpp
 create mode 100644 test/Modules/odr_hash-blocks.cpp
 delete mode 100644 test/Sema/_Float128.c

diff --git a/docs/OpenMPSupport.rst b/docs/OpenMPSupport.rst
new file mode 100644
index 000000000000..c121df3e2010
--- /dev/null
+++ b/docs/OpenMPSupport.rst
@@ -0,0 +1,68 @@
+.. raw:: html
+
+  <style type="text/css">
+    .none { background-color: #FFCCCC }
+    .partial { background-color: #FFFF99 }
+    .good { background-color: #CCFF99 }
+  </style>
+
+.. role:: none
+.. role:: partial
+.. role:: good
+
+==================
+OpenMP Support
+==================
+
+Clang fully supports OpenMP 3.1 + some elements of OpenMP 4.5. Clang supports offloading to X86_64, AArch64 and PPC64[LE] devices.
+Support for Cuda devices is not ready yet.
+The status of major OpenMP 4.5 features support in Clang.
+
+Standalone directives
+=====================
+
+* #pragma omp [for] simd: :good:`Complete`.
+
+* #pragma omp declare simd: :partial:`Partial`.  We support parsing/semantic
+  analysis + generation of special attributes for X86 target, but still
+  missing the LLVM pass for vectorization.
+
+* #pragma omp taskloop [simd]: :good:`Complete`.
+
+* #pragma omp target [enter|exit] data: :good:`Complete`.
+
+* #pragma omp target update: :good:`Complete`.
+
+* #pragma omp target: :partial:`Partial`.  No support for the `depend` clauses.
+
+* #pragma omp declare target: :partial:`Partial`.  No full codegen support.
+
+* #pragma omp teams: :good:`Complete`.
+
+* #pragma omp distribute [simd]: :good:`Complete`.
+
+* #pragma omp distribute parallel for [simd]: :good:`Complete`.
+
+Combined directives
+===================
+
+* #pragma omp parallel for simd: :good:`Complete`.
+
+* #pragma omp target parallel: :partial:`Partial`.  No support for the `depend` clauses.
+
+* #pragma omp target parallel for [simd]: :partial:`Partial`.  No support for the `depend` clauses.
+
+* #pragma omp target simd: :partial:`Partial`.  No support for the `depend` clauses.
+
+* #pragma omp target teams: :partial:`Partial`.  No support for the `depend` clauses.
+
+* #pragma omp teams distribute [simd]: :good:`Complete`.
+
+* #pragma omp target teams distribute [simd]: :partial:`Partial`.  No support for the and `depend` clauses.
+
+* #pragma omp teams distribute parallel for [simd]: :good:`Complete`.
+
+* #pragma omp target teams distribute parallel for [simd]: :partial:`Partial`.  No full codegen support.
+
+Clang does not support any constructs/updates from upcoming OpenMP 5.0 except for `reduction`-based clauses in the `task` and `target`-based directives.
+In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and mac OS.
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 92aa01c4b1ef..6857903c96d9 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -163,6 +163,15 @@ Attribute Changes in Clang
 - The presence of __attribute__((availability(...))) on a declaration no longer
   implies default visibility for that declaration on macOS.
 
+- Clang now supports configuration files. These are collections of driver
+  options, which can be applied by specifying the configuration file, either
+  using command line option `--config foo.cfg` or encoding it into executable
+  name `foo-clang`. Clang behaves as if the options from this file were inserted
+  before the options specified in command line. This feature is primary intended
+  to facilitate cross compilation. Details can be found in
+  `Clang Compiler User's Manual
+  <http://clang.llvm.org/docs/UsersManual.html#configuration-files>`.
+
 - ...
 
 Windows Support
@@ -209,7 +218,7 @@ OpenCL C Language Changes in Clang
 OpenMP Support in Clang
 ----------------------------------
 
-- Added options `-f[no]-openmp-simd` that support code emission only foe OpenMP
+- Added options `-f[no]-openmp-simd` that support code emission only for OpenMP
   SIMD-based directives, like `#pragma omp simd`, `#pragma omp parallel for simd`
   etc. The code is emitted only for simd-based part of the combined directives
   and clauses.
@@ -222,6 +231,13 @@ OpenMP Support in Clang
 - Added support for `reduction`-based clauses on `task`-based directives from
   upcoming OpenMP 5.0.
 
+- The LLVM OpenMP runtime `libomp` now supports the OpenMP Tools Interface (OMPT)
+  on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS. If you observe
+  a measurable performance impact on one of your applications without a tool
+  attached, please rebuild the runtime library with `-DLIBOMP_OMPT_SUPPORT=OFF` and
+  file a bug at `LLVM's Bugzilla <https://bugs.llvm.org/>`_ or send a message to the
+  `OpenMP development list <http://lists.llvm.org/cgi-bin/mailman/listinfo/openmp-dev>`_.
+
 Internal API Changes
 --------------------
 
diff --git a/docs/index.rst b/docs/index.rst
index 342ab74d2d80..ed479534ee97 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -39,6 +39,7 @@ Using Clang as a Compiler
    SourceBasedCodeCoverage
    Modules
    MSVCCompatibility
+   OpenMPSupport
    ThinLTO
    CommandGuide/index
    FAQ
diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td
index 8b84c4b8b50d..59f595e03c34 100644
--- a/include/clang/Basic/Attr.td
+++ b/include/clang/Basic/Attr.td
@@ -549,6 +549,7 @@ def Aligned : InheritableAttr {
                                           Keyword<"_Alignas">]>,
                    Accessor<"isDeclspec",[Declspec<"align">]>];
   let Documentation = [Undocumented];
+  let DuplicatesAllowedWhileMerging = 1;
 }
 
 def AlignValue : Attr {
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 465551be7742..9169b7c22920 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -1357,15 +1357,15 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
 
-TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iiV4iUc", "", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iiV8iUc", "", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iiV16iUs", "", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiiV2LLiUc", "", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiiV4LLiUc", "", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiiV8LLiUc", "", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8siV8sUc", "", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16siV16sUs", "", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32siV32sUi", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2")
 
 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
diff --git a/include/clang/Basic/DiagnosticGroups.td b/include/clang/Basic/DiagnosticGroups.td
index 79b2766ae16f..b54f07508867 100644
--- a/include/clang/Basic/DiagnosticGroups.td
+++ b/include/clang/Basic/DiagnosticGroups.td
@@ -444,8 +444,7 @@ def TautologicalInRangeCompare : DiagGroup<"tautological-constant-in-range-compa
                                             TautologicalUnsignedEnumZeroCompare]>;
 def TautologicalOutOfRangeCompare : DiagGroup<"tautological-constant-out-of-range-compare">;
 def TautologicalConstantCompare : DiagGroup<"tautological-constant-compare",
-                                            [TautologicalInRangeCompare,
-                                             TautologicalOutOfRangeCompare]>;
+                                            [TautologicalOutOfRangeCompare]>;
 def TautologicalPointerCompare : DiagGroup<"tautological-pointer-compare">;
 def TautologicalOverlapCompare : DiagGroup<"tautological-overlap-compare">;
 def TautologicalUndefinedCompare : DiagGroup<"tautological-undefined-compare">;
@@ -719,7 +718,6 @@ def IntToPointerCast : DiagGroup<"int-to-pointer-cast",
 def Move : DiagGroup<"move", [PessimizingMove, RedundantMove, SelfMove]>;
 
 def Extra : DiagGroup<"extra", [
-    TautologicalInRangeCompare,
     MissingFieldInitializers,
     IgnoredQualifiers,
     InitializerOverrides,
diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def
index 6ae8821a834d..91c13244f9a0 100644
--- a/include/clang/Basic/TokenKinds.def
+++ b/include/clang/Basic/TokenKinds.def
@@ -398,7 +398,6 @@ TYPE_TRAIT_2(__builtin_types_compatible_p, TypeCompatible, KEYNOCXX)
 KEYWORD(__builtin_va_arg            , KEYALL)
 KEYWORD(__extension__               , KEYALL)
 KEYWORD(__float128                  , KEYALL)
-ALIAS("_Float128", __float128       , KEYNOCXX)
 KEYWORD(__imag                      , KEYALL)
 KEYWORD(__int128                    , KEYALL)
 KEYWORD(__label__                   , KEYALL)
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
index 18fa85c9657f..d3163ef3e576 100644
--- a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
@@ -32,27 +32,39 @@ class BugType {
   const CheckName Check;
   const std::string Name;
   const std::string Category;
-  bool SuppressonSink;
+  const CheckerBase *Checker;
+  bool SuppressOnSink;
 
   virtual void anchor();
-public:
-  BugType(class CheckName check, StringRef name, StringRef cat)
-      : Check(check), Name(name), Category(cat), SuppressonSink(false) {}
-  BugType(const CheckerBase *checker, StringRef name, StringRef cat)
-      : Check(checker->getCheckName()), Name(name), Category(cat),
-        SuppressonSink(false) {}
-  virtual ~BugType() {}
 
-  // FIXME: Should these be made strings as well?
+public:
+  BugType(CheckName Check, StringRef Name, StringRef Cat)
+      : Check(Check), Name(Name), Category(Cat), Checker(nullptr),
+        SuppressOnSink(false) {}
+  BugType(const CheckerBase *Checker, StringRef Name, StringRef Cat)
+      : Check(Checker->getCheckName()), Name(Name), Category(Cat),
+        Checker(Checker), SuppressOnSink(false) {}
+  virtual ~BugType() = default;
+
   StringRef getName() const { return Name; }
   StringRef getCategory() const { return Category; }
-  StringRef getCheckName() const { return Check.getName(); }
+  StringRef getCheckName() const {
+    // FIXME: This is a workaround to ensure that the correct check name is used
+    // The check names are set after the constructors are run.
+    // In case the BugType object is initialized in the checker's ctor
+    // the Check field will be empty. To circumvent this problem we use 
+    // CheckerBase whenever it is possible.
+    StringRef CheckName =
+        Checker ? Checker->getCheckName().getName() : Check.getName();
+    assert(!CheckName.empty() && "Check name is not set properly.");
+    return CheckName;
+  }
 
   /// isSuppressOnSink - Returns true if bug reports associated with this bug
   ///  type should be suppressed if the end node of the report is post-dominated
   ///  by a sink node.
-  bool isSuppressOnSink() const { return SuppressonSink; }
-  void setSuppressOnSink(bool x) { SuppressonSink = x; }
+  bool isSuppressOnSink() const { return SuppressOnSink; }
+  void setSuppressOnSink(bool x) { SuppressOnSink = x; }
 
   virtual void FlushReports(BugReporter& BR);
 };
@@ -74,7 +86,7 @@ class BuiltinBug : public BugType {
   StringRef getDescription() const { return desc; }
 };
 
-} // end GR namespace
+} // end ento namespace
 
 } // end clang namespace
 #endif
diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp
index 29ce7ae034b5..2cdcdae9ab02 100644
--- a/lib/AST/DeclBase.cpp
+++ b/lib/AST/DeclBase.cpp
@@ -891,12 +891,14 @@ bool Decl::AccessDeclContextSanity() const {
   // 4. the context is not a record
   // 5. it's invalid
   // 6. it's a C++0x static_assert.
+  // 7. it's a block literal declaration
   if (isa<TranslationUnitDecl>(this) ||
       isa<TemplateTypeParmDecl>(this) ||
       isa<NonTypeTemplateParmDecl>(this) ||
       !isa<CXXRecordDecl>(getDeclContext()) ||
       isInvalidDecl() ||
       isa<StaticAssertDecl>(this) ||
+      isa<BlockDecl>(this) ||
       // FIXME: a ParmVarDecl can have ClassTemplateSpecialization
       // as DeclContext (?).
       isa<ParmVarDecl>(this) ||
diff --git a/lib/AST/ODRHash.cpp b/lib/AST/ODRHash.cpp
index 088d8bedd453..38e8d34135f9 100644
--- a/lib/AST/ODRHash.cpp
+++ b/lib/AST/ODRHash.cpp
@@ -478,6 +478,8 @@ void ODRHash::AddFunctionDecl(const FunctionDecl *Function) {
 
   // TODO: Fix hashing for class methods.
   if (isa<CXXMethodDecl>(Function)) return;
+  // And friend functions.
+  if (Function->getFriendObjectKind()) return;
 
   // Skip functions that are specializations or in specialization context.
   const DeclContext *DC = Function;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index ba54f8342f1b..35ae114c4f25 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -915,7 +915,11 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
       Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
     }
 
-    Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy);
+    // Negate the product if it would be negative in infinite precision.
+    Result = CGF.Builder.CreateSelect(
+        IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
+
+    Result = CGF.Builder.CreateTrunc(Result, ResTy);
   }
   assert(Overflow && Result && "Missing overflow or result");
 
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index d0760b9cc2a6..0fe9f5da07c8 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -229,6 +229,11 @@ class PCHContainerGenerator : public ASTConsumer {
       Builder->getModuleDebugInfo()->completeRequiredType(RD);
   }
 
+  void HandleImplicitImportDecl(ImportDecl *D) override {
+    if (!D->getImportedOwningModule())
+      Builder->getModuleDebugInfo()->EmitImportDecl(*D);
+  }
+
   /// Emit a container holding the serialized AST.
   void HandleTranslationUnit(ASTContext &Ctx) override {
     assert(M && VMContext && Builder);
diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp
index 639050f7c64b..321d963827d1 100644
--- a/lib/Frontend/InitPreprocessor.cpp
+++ b/lib/Frontend/InitPreprocessor.cpp
@@ -817,10 +817,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   DefineFloatMacros(Builder, "FLT", &TI.getFloatFormat(), "F");
   DefineFloatMacros(Builder, "DBL", &TI.getDoubleFormat(), "");
   DefineFloatMacros(Builder, "LDBL", &TI.getLongDoubleFormat(), "L");
-  if (TI.hasFloat128Type())
-    // FIXME: Switch away from the non-standard "Q" when we can
-    DefineFloatMacros(Builder, "FLT128", &TI.getFloat128Format(), "Q");
-
 
   // Define a __POINTER_WIDTH__ macro for stdint.h.
   Builder.defineMacro("__POINTER_WIDTH__",
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 830354ab23f0..8bd4ab0ff9ca 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -2009,18 +2009,21 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
   const char *AfterLessPos = CurPtr;
   char C = getAndAdvanceChar(CurPtr, Result);
   while (C != '>') {
-    // Skip escaped characters.
-    if (C == '\\' && CurPtr < BufferEnd) {
-      // Skip the escaped character.
-      getAndAdvanceChar(CurPtr, Result);
-    } else if (C == '\n' || C == '\r' ||             // Newline.
-               (C == 0 && (CurPtr-1 == BufferEnd ||  // End of file.
-                           isCodeCompletionPoint(CurPtr-1)))) {
+    // Skip escaped characters.  Escaped newlines will already be processed by
+    // getAndAdvanceChar.
+    if (C == '\\')
+      C = getAndAdvanceChar(CurPtr, Result);
+
+    if (C == '\n' || C == '\r' ||             // Newline.
+        (C == 0 && (CurPtr-1 == BufferEnd ||  // End of file.
+                    isCodeCompletionPoint(CurPtr-1)))) {
       // If the filename is unterminated, then it must just be a lone <
       // character.  Return this as such.
       FormTokenWithChars(Result, AfterLessPos, tok::less);
       return true;
-    } else if (C == 0) {
+    }
+
+    if (C == 0) {
       NulCharacter = CurPtr-1;
     }
     C = getAndAdvanceChar(CurPtr, Result);
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
index f5e8cdc25d38..9758557d7b44 100644
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -105,8 +105,10 @@ void Preprocessor::CachingLex(Token &Result) {
 }
 
 void Preprocessor::EnterCachingLexMode() {
-  if (InCachingLexMode())
+  if (InCachingLexMode()) {
+    assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
     return;
+  }
 
   PushIncludeMacroStack();
   CurLexerKind = CLK_CachingLexer;
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index e484e9c4c3a3..f21787338b37 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -444,6 +444,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
       }
 
       CurPPLexer = nullptr;
+      recomputeCurLexerKind();
       return true;
     }
 
diff --git a/lib/Sema/Scope.cpp b/lib/Sema/Scope.cpp
index ae5b181c6728..eae5a328bfa2 100644
--- a/lib/Sema/Scope.cpp
+++ b/lib/Sema/Scope.cpp
@@ -143,72 +143,43 @@ void Scope::dumpImpl(raw_ostream &OS) const {
   if (HasFlags)
     OS << "Flags: ";
 
-  while (Flags) {
-    if (Flags & FnScope) {
-      OS << "FnScope";
-      Flags &= ~FnScope;
-    } else if (Flags & BreakScope) {
-      OS << "BreakScope";
-      Flags &= ~BreakScope;
-    } else if (Flags & ContinueScope) {
-      OS << "ContinueScope";
-      Flags &= ~ContinueScope;
-    } else if (Flags & DeclScope) {
-      OS << "DeclScope";
-      Flags &= ~DeclScope;
-    } else if (Flags & ControlScope) {
-      OS << "ControlScope";
-      Flags &= ~ControlScope;
-    } else if (Flags & ClassScope) {
-      OS << "ClassScope";
-      Flags &= ~ClassScope;
-    } else if (Flags & BlockScope) {
-      OS << "BlockScope";
-      Flags &= ~BlockScope;
-    } else if (Flags & TemplateParamScope) {
-      OS << "TemplateParamScope";
-      Flags &= ~TemplateParamScope;
-    } else if (Flags & FunctionPrototypeScope) {
-      OS << "FunctionPrototypeScope";
-      Flags &= ~FunctionPrototypeScope;
-    } else if (Flags & FunctionDeclarationScope) {
-      OS << "FunctionDeclarationScope";
-      Flags &= ~FunctionDeclarationScope;
-    } else if (Flags & AtCatchScope) {
-      OS << "AtCatchScope";
-      Flags &= ~AtCatchScope;
-    } else if (Flags & ObjCMethodScope) {
-      OS << "ObjCMethodScope";
-      Flags &= ~ObjCMethodScope;
-    } else if (Flags & SwitchScope) {
-      OS << "SwitchScope";
-      Flags &= ~SwitchScope;
-    } else if (Flags & TryScope) {
-      OS << "TryScope";
-      Flags &= ~TryScope;
-    } else if (Flags & FnTryCatchScope) {
-      OS << "FnTryCatchScope";
-      Flags &= ~FnTryCatchScope;
-    } else if (Flags & SEHTryScope) {
-      OS << "SEHTryScope";
-      Flags &= ~SEHTryScope;
-    } else if (Flags & SEHExceptScope) {
-      OS << "SEHExceptScope";
-      Flags &= ~SEHExceptScope;
-    } else if (Flags & OpenMPDirectiveScope) {
-      OS << "OpenMPDirectiveScope";
-      Flags &= ~OpenMPDirectiveScope;
-    } else if (Flags & OpenMPLoopDirectiveScope) {
-      OS << "OpenMPLoopDirectiveScope";
-      Flags &= ~OpenMPLoopDirectiveScope;
-    } else if (Flags & OpenMPSimdDirectiveScope) {
-      OS << "OpenMPSimdDirectiveScope";
-      Flags &= ~OpenMPSimdDirectiveScope;
-    }
+  std::pair<unsigned, const char *> FlagInfo[] = {
+      {FnScope, "FnScope"},
+      {BreakScope, "BreakScope"},
+      {ContinueScope, "ContinueScope"},
+      {DeclScope, "DeclScope"},
+      {ControlScope, "ControlScope"},
+      {ClassScope, "ClassScope"},
+      {BlockScope, "BlockScope"},
+      {TemplateParamScope, "TemplateParamScope"},
+      {FunctionPrototypeScope, "FunctionPrototypeScope"},
+      {FunctionDeclarationScope, "FunctionDeclarationScope"},
+      {AtCatchScope, "AtCatchScope"},
+      {ObjCMethodScope, "ObjCMethodScope"},
+      {SwitchScope, "SwitchScope"},
+      {TryScope, "TryScope"},
+      {FnTryCatchScope, "FnTryCatchScope"},
+      {OpenMPDirectiveScope, "OpenMPDirectiveScope"},
+      {OpenMPLoopDirectiveScope, "OpenMPLoopDirectiveScope"},
+      {OpenMPSimdDirectiveScope, "OpenMPSimdDirectiveScope"},
+      {EnumScope, "EnumScope"},
+      {SEHTryScope, "SEHTryScope"},
+      {SEHExceptScope, "SEHExceptScope"},
+      {SEHFilterScope, "SEHFilterScope"},
+      {CompoundStmtScope, "CompoundStmtScope"},
+      {ClassInheritanceScope, "ClassInheritanceScope"}};
 
-    if (Flags)
-      OS << " | ";
+  for (auto Info : FlagInfo) {
+    if (Flags & Info.first) {
+      OS << Info.second;
+      Flags &= ~Info.first;
+      if (Flags)
+        OS << " | ";
+    }
   }
+
+  assert(Flags == 0 && "Unknown scope flags");
+
   if (HasFlags)
     OS << '\n';
 
diff --git a/lib/Sema/SemaTemplateDeduction.cpp b/lib/Sema/SemaTemplateDeduction.cpp
index 3a0f2ff0004b..d09cf9933ecf 100644
--- a/lib/Sema/SemaTemplateDeduction.cpp
+++ b/lib/Sema/SemaTemplateDeduction.cpp
@@ -502,6 +502,10 @@ DeduceTemplateArguments(Sema &S,
                         SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
   assert(Arg.isCanonical() && "Argument type must be canonical");
 
+  // Treat an injected-class-name as its underlying template-id.
+  if (auto *Injected = dyn_cast<InjectedClassNameType>(Arg))
+    Arg = Injected->getInjectedSpecializationType();
+
   // Check whether the template argument is a dependent template-id.
   if (const TemplateSpecializationType *SpecArg
         = dyn_cast<TemplateSpecializationType>(Arg)) {
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index ab68e7e671de..9163fbc6f7e8 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -4160,7 +4160,8 @@ void Sema::BuildVariableInstantiation(
   // it right away if the type contains 'auto'.
   if ((!isa<VarTemplateSpecializationDecl>(NewVar) &&
        !InstantiatingVarTemplate &&
-       !(OldVar->isInline() && OldVar->isThisDeclarationADefinition())) ||
+       !(OldVar->isInline() && OldVar->isThisDeclarationADefinition() &&
+         !NewVar->isThisDeclarationADefinition())) ||
       NewVar->getType()->isUndeducedType())
     InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs);
 
diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 851114004b96..904c9ffa37df 100644
--- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -2900,8 +2900,13 @@ void ento::registerNewDeleteLeaksChecker(CheckerManager &mgr) {
       mgr.getCurrentCheckName();
   // We currently treat NewDeleteLeaks checker as a subchecker of NewDelete
   // checker.
-  if (!checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker])
+  if (!checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker]) {
     checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker] = true;
+    // FIXME: This does not set the correct name, but without this workaround
+    //        no name will be set at all.
+    checker->CheckNames[MallocChecker::CK_NewDeleteChecker] =
+        mgr.getCurrentCheckName();
+  }
 }
 
 #define REGISTER_CHECKER(name)                                                 \
diff --git a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
index 06c4ef71d80b..1ebac2118a42 100644
--- a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
@@ -64,7 +64,7 @@ class ValistChecker : public Checker<check::PreCall, check::PreStmt<VAArgExpr>,
                                  CheckerContext &C) const;
   void reportLeakedVALists(const RegionVector &LeakedVALists, StringRef Msg1,
                            StringRef Msg2, CheckerContext &C, ExplodedNode *N,
-                           bool ForceReport = false) const;
+                           bool ReportUninit = false) const;
 
   void checkVAListStartCall(const CallEvent &Call, CheckerContext &C,
                             bool IsCopy) const;
@@ -267,15 +267,19 @@ void ValistChecker::reportUninitializedAccess(const MemRegion *VAList,
 void ValistChecker::reportLeakedVALists(const RegionVector &LeakedVALists,
                                         StringRef Msg1, StringRef Msg2,
                                         CheckerContext &C, ExplodedNode *N,
-                                        bool ForceReport) const {
+                                        bool ReportUninit) const {
   if (!(ChecksEnabled[CK_Unterminated] ||
-        (ChecksEnabled[CK_Uninitialized] && ForceReport)))
+        (ChecksEnabled[CK_Uninitialized] && ReportUninit)))
     return;
   for (auto Reg : LeakedVALists) {
     if (!BT_leakedvalist) {
-      BT_leakedvalist.reset(new BugType(CheckNames[CK_Unterminated],
-                                        "Leaked va_list",
-                                        categories::MemoryError));
+      // FIXME: maybe creating a new check name for this type of bug is a better
+      // solution.
+      BT_leakedvalist.reset(
+          new BugType(CheckNames[CK_Unterminated].getName().empty()
+                          ? CheckNames[CK_Uninitialized]
+                          : CheckNames[CK_Unterminated],
+                      "Leaked va_list", categories::MemoryError));
       BT_leakedvalist->setSuppressOnSink(true);
     }
 
@@ -375,7 +379,7 @@ void ValistChecker::checkVAListEndCall(const CallEvent &Call,
 
 std::shared_ptr<PathDiagnosticPiece> ValistChecker::ValistBugVisitor::VisitNode(
     const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC,
-    BugReport &BR) {
+    BugReport &) {
   ProgramStateRef State = N->getState();
   ProgramStateRef StatePrev = PrevN->getState();
 
diff --git a/test/Analysis/malloc.c b/test/Analysis/malloc.c
index 4c364ebd9a2f..e08ec1b76cff 100644
--- a/test/Analysis/malloc.c
+++ b/test/Analysis/malloc.c
@@ -1720,13 +1720,6 @@ void *smallocWarn(size_t size) {
   }
 }
 
-char *dupstrWarn(const char *s) {
-  const int len = strlen(s);
-  char *p = (char*) smallocWarn(len + 1);
-  strcpy(p, s); // expected-warning{{String copy function overflows destination buffer}}
-  return p;
-}
-
 int *radar15580979() {
   int *data = (int *)malloc(32);
   int *p = data ?: (int*)malloc(32); // no warning
diff --git a/test/CodeCompletion/Inputs/comments.h b/test/CodeCompletion/Inputs/comments.h
new file mode 100644
index 000000000000..7b4b5daa0bd5
--- /dev/null
+++ b/test/CodeCompletion/Inputs/comments.h
@@ -0,0 +1,4 @@
+// PR32732
+struct B {
+  // <- code completion
+};
diff --git a/test/CodeCompletion/comments.cpp b/test/CodeCompletion/comments.cpp
new file mode 100644
index 000000000000..21f1465ebc04
--- /dev/null
+++ b/test/CodeCompletion/comments.cpp
@@ -0,0 +1,13 @@
+// Note: the run lines follow their respective tests, since line/column
+// matter in this test.
+
+#include "comments.h"
+
+struct A {
+  // <- code completion
+  /* <- code completion */
+};
+
+// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%s:7:6 %s
+// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%s:8:6 %s
+// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%S/Inputs/comments.h:3:6 %s
diff --git a/test/CodeGen/builtins-overflow.c b/test/CodeGen/builtins-overflow.c
index 7a30cfbd46ee..f83bbfb9672d 100644
--- a/test/CodeGen/builtins-overflow.c
+++ b/test/CodeGen/builtins-overflow.c
@@ -373,7 +373,9 @@ int test_mixed_sign_mull_overflow_unsigned(int x, unsigned y) {
 // CHECK-NEXT:  [[NotNull:%.*]] = icmp ne i32 [[UnsignedResult]], 0
 // CHECK-NEXT:  [[Underflow:%.*]] = and i1 [[IsNeg]], [[NotNull]]
 // CHECK-NEXT:  [[OFlow:%.*]] = or i1 [[UnsignedOFlow]], [[Underflow]]
-// CHECK-NEXT:  store i32 [[UnsignedResult]], i32* %{{.*}}, align 4
+// CHECK-NEXT:  [[NegatedResult:%.*]] = sub i32 0, [[UnsignedResult]]
+// CHECK-NEXT:  [[Result:%.*]] = select i1 [[IsNeg]], i32 [[NegatedResult]], i32 [[UnsignedResult]]
+// CHECK-NEXT:  store i32 [[Result]], i32* %{{.*}}, align 4
 // CHECK:       br i1 [[OFlow]]
 
   unsigned result;
@@ -432,7 +434,9 @@ long long test_mixed_sign_mulll_overflow_trunc_unsigned(long long x, unsigned lo
 // CHECK-NEXT:  [[OVERFLOW_PRE_TRUNC:%.*]] = or i1 {{.*}}, [[UNDERFLOW]]
 // CHECK-NEXT:  [[TRUNC_OVERFLOW:%.*]] = icmp ugt i64 [[UNSIGNED_RESULT]], 4294967295
 // CHECK-NEXT:  [[OVERFLOW:%.*]] = or i1 [[OVERFLOW_PRE_TRUNC]], [[TRUNC_OVERFLOW]]
-// CHECK-NEXT:  trunc i64 [[UNSIGNED_RESULT]] to i32
+// CHECK-NEXT:  [[NEGATED:%.*]] = sub i64 0, [[UNSIGNED_RESULT]]
+// CHECK-NEXT:  [[RESULT:%.*]] = select i1 {{.*}}, i64 [[NEGATED]], i64 [[UNSIGNED_RESULT]]
+// CHECK-NEXT:  trunc i64 [[RESULT]] to i32
 // CHECK-NEXT:  store
   unsigned result;
   if (__builtin_mul_overflow(y, x, &result))
diff --git a/test/CodeGenCXX/cxx1z-inline-variables.cpp b/test/CodeGenCXX/cxx1z-inline-variables.cpp
index 2d16acd8a8c2..50eab3b70611 100644
--- a/test/CodeGenCXX/cxx1z-inline-variables.cpp
+++ b/test/CodeGenCXX/cxx1z-inline-variables.cpp
@@ -58,14 +58,22 @@ template<typename T> struct X {
   static int a;
   static inline int b;
   static int c;
+  static const int d;
+  static int e;
 };
 // CHECK: @_ZN1XIiE1aE = linkonce_odr global i32 10
 // CHECK: @_ZN1XIiE1bE = global i32 20
 // CHECK-NOT: @_ZN1XIiE1cE
+// CHECK: @_ZN1XIiE1dE = linkonce_odr constant i32 40
+// CHECK: @_ZN1XIiE1eE = linkonce_odr global i32 50
 template<> inline int X<int>::a = 10;
 int &use3 = X<int>::a;
 template<> int X<int>::b = 20;
 template<> inline int X<int>::c = 30;
+template<typename T> constexpr int X<T>::d = 40;
+template<typename T> inline int X<T>::e = 50;
+const int *use_x_int_d = &X<int>::d;
+const int *use_x_int_e = &X<int>::e;
 
 template<typename T> struct Y;
 template<> struct Y<int> {
diff --git a/test/Lexer/null-character-in-literal.c b/test/Lexer/null-character-in-literal.c
new file mode 100644
index 0000000000000000000000000000000000000000..a479547536762e96405e1866f3a15b7429ca987e
GIT binary patch
literal 917
zcmcIj%TB{E5bQZ$u_~$p6=~>|m-qsN5Zo#u$Jw-2-F5JzZB+H&S(66!!KnhD?6r4x
zc6OZS@cRCI3j?VIO+Ta@Lsq$lyjW+3bOYHEt*ROrxFpGQc$+B)mC^{(@FPVIwDM?$
z1`1k(oCzx=2i!Fj{76`=0^b=A-hjo0St9ruE+UwaQkBRsS~sI4iMb%)0n%Q22Yip~
z8X2q1R>G2^DQq@}MK0YH)D|7uC6=J*yL?AzyKNDRh&cHxNH|!(kbKI<%rnrO%!&Y=
z6g7gw&wwP6iL7r@X4X<0O6Jy&J@tNs>z~=W^EOJfn?~3Cu<R;K{-MNy7DtEkrQyt%
z;GwW)j>J=yI+Pfv%f11ml8q%ra2)dBLLCj@(DAltW%jW1iyI{uUHW}gc~67K^9**P
e@=@1&$9)0NT4P&hq(?;3_B+KNQkg&{Pwop|mop;(

literal 0
HcmV?d00001

diff --git a/test/Modules/ExtDebugInfo.cpp b/test/Modules/ExtDebugInfo.cpp
index 97386bc4d007..c57f1f034eb2 100644
--- a/test/Modules/ExtDebugInfo.cpp
+++ b/test/Modules/ExtDebugInfo.cpp
@@ -187,7 +187,7 @@ void foo() {
 
 // CHECK: !DIGlobalVariable(name: "anon_enum", {{.*}}, type: ![[ANON_ENUM:[0-9]+]]
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, scope: ![[NS]],
-// CHECK-SAME:             line: 16
+// CHECK-SAME:             line: 19
 
 // CHECK: !DIGlobalVariable(name: "GlobalUnion",
 // CHECK-SAME:              type: ![[GLOBAL_UNION:[0-9]+]]
diff --git a/test/Modules/Inputs/DebugCXX.h b/test/Modules/Inputs/DebugCXX.h
index 1ccf8d302f13..8f83c0bc69db 100644
--- a/test/Modules/Inputs/DebugCXX.h
+++ b/test/Modules/Inputs/DebugCXX.h
@@ -1,4 +1,7 @@
 /* -*- C++ -*- */
+
+#include "dummy.h"
+
 namespace DebugCXX {
   // Records.
   struct Struct {
diff --git a/test/Modules/Inputs/odr_hash-Friend/Box.h b/test/Modules/Inputs/odr_hash-Friend/Box.h
new file mode 100644
index 000000000000..01ab90d601c2
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/Box.h
@@ -0,0 +1,14 @@
+template <class T>
+struct iterator {
+  void Compare(const iterator &x) { }
+  friend void Check(iterator) {}
+};
+
+template <class T = int> struct Box {
+  iterator<T> I;
+
+  void test() {
+    Check(I);
+    I.Compare(I);
+  }
+};
diff --git a/test/Modules/Inputs/odr_hash-Friend/M1.h b/test/Modules/Inputs/odr_hash-Friend/M1.h
new file mode 100644
index 000000000000..202ad06c3488
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/M1.h
@@ -0,0 +1,6 @@
+#include "Box.h"
+
+void Peek() {
+  Box<> Gift;
+  Gift.test();
+}
diff --git a/test/Modules/Inputs/odr_hash-Friend/M2.h b/test/Modules/Inputs/odr_hash-Friend/M2.h
new file mode 100644
index 000000000000..69f08a957ede
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/M2.h
@@ -0,0 +1,5 @@
+#include "Box.h"
+void x() {
+  Box<> Unused;
+  //Unused.test();
+}
diff --git a/test/Modules/Inputs/odr_hash-Friend/M3.h b/test/Modules/Inputs/odr_hash-Friend/M3.h
new file mode 100644
index 000000000000..ab457e0c08f2
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/M3.h
@@ -0,0 +1,7 @@
+#include "Box.h"
+#include "M2.h"
+
+void Party() {
+  Box<> Present;
+  Present.test();
+}
diff --git a/test/Modules/Inputs/odr_hash-Friend/module.modulemap b/test/Modules/Inputs/odr_hash-Friend/module.modulemap
new file mode 100644
index 000000000000..28e1832e30e9
--- /dev/null
+++ b/test/Modules/Inputs/odr_hash-Friend/module.modulemap
@@ -0,0 +1,15 @@
+module Box {
+  header "Box.h"
+}
+
+module Module1 {
+  header "M1.h"
+}
+
+module Module2 {
+  header "M2.h"
+}
+
+module Module3 {
+  header "M3.h"
+}
diff --git a/test/Modules/ModuleDebugInfo.cpp b/test/Modules/ModuleDebugInfo.cpp
index 008b3e4f2bab..f0d883767045 100644
--- a/test/Modules/ModuleDebugInfo.cpp
+++ b/test/Modules/ModuleDebugInfo.cpp
@@ -5,12 +5,13 @@
 
 // Modules:
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -triple %itanium_abi_triple -x objective-c++ -std=c++11 -debug-info-kind=limited -fmodules -fmodule-format=obj -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t.ll -mllvm -debug-only=pchcontainer &>%t-mod.ll
+// RUN: %clang_cc1 -triple %itanium_abi_triple -x objective-c++ -std=c++11 -debugger-tuning=lldb -debug-info-kind=limited -fmodules -fmodule-format=obj -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t.ll -mllvm -debug-only=pchcontainer &>%t-mod.ll
 // RUN: cat %t-mod.ll | FileCheck %s
 // RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-NEG %s
+// RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-MOD %s
 
 // PCH:
-// RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11 -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll
+// RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11  -debugger-tuning=lldb -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll
 // RUN: cat %t-pch.ll | FileCheck %s
 // RUN: cat %t-pch.ll | FileCheck --check-prefix=CHECK-NEG %s
 
@@ -18,6 +19,9 @@
 @import DebugCXX;
 #endif
 
+// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus,
+// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus,
+
 // CHECK: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus,
 // CHECK-SAME:                    isOptimized: false,
 // CHECK-NOT:                     splitDebugFilename:
@@ -27,6 +31,8 @@
 // CHECK-SAME:             identifier: "_ZTSN8DebugCXX4EnumE")
 // CHECK: !DINamespace(name: "DebugCXX"
 
+// CHECK-MOD: ![[DEBUGCXX:.*]] = !DIModule(scope: null, name: "DebugCXX
+
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type,
 // CHECK-NOT:              name:
 // CHECK-SAME:             )
@@ -150,4 +156,11 @@
 // CHECK-SAME:                             name: "WithSpecializedBase<float>",
 // CHECK-SAME:                             flags: DIFlagFwdDecl,
 
+// CHECK-MOD: !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: ![[DEBUGCXX]],
+// CHECK-MOD-SAME:              entity: ![[DUMMY:[0-9]+]],
+// CHECK-MOD-SAME:              line: 3)
+// CHECK-MOD: ![[DUMMY]] = !DIModule(scope: null, name: "dummy",
+// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_ObjC_plus_plus,
+// CHECK-MOD-SAME:  splitDebugFilename: "{{.*}}dummy{{.*}}.pcm",
+
 // CHECK-NEG-NOT: !DICompositeType(tag: DW_TAG_structure_type, name: "PureForwardDecl"
diff --git a/test/Modules/odr_hash-Friend.cpp b/test/Modules/odr_hash-Friend.cpp
new file mode 100644
index 000000000000..39c0c4b762c2
--- /dev/null
+++ b/test/Modules/odr_hash-Friend.cpp
@@ -0,0 +1,22 @@
+// RUN: rm -rf %t
+
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modules.cache \
+// RUN:  -I %S/Inputs/odr_hash-Friend \
+// RUN:  -emit-obj -o /dev/null \
+// RUN:  -fmodules \
+// RUN:  -fimplicit-module-maps \
+// RUN:  -fmodules-cache-path=%t/modules.cache \
+// RUN:  -std=c++11 -x c++ %s -verify
+
+// PR35939: MicrosoftMangle.cpp triggers an assertion failure on this test.
+// UNSUPPORTED: system-windows
+
+// expected-no-diagnostics
+
+#include "Box.h"
+#include "M1.h"
+#include "M3.h"
+
+void Run() {
+  Box<> Present;
+}
diff --git a/test/Modules/odr_hash-blocks.cpp b/test/Modules/odr_hash-blocks.cpp
new file mode 100644
index 000000000000..07dfa4ce2ac8
--- /dev/null
+++ b/test/Modules/odr_hash-blocks.cpp
@@ -0,0 +1,119 @@
+// Clear and create directories
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: mkdir %t/cache
+// RUN: mkdir %t/Inputs
+
+// Build first header file
+// RUN: echo "#define FIRST" >> %t/Inputs/first.h
+// RUN: cat %s               >> %t/Inputs/first.h
+
+// Build second header file
+// RUN: echo "#define SECOND" >> %t/Inputs/second.h
+// RUN: cat %s                >> %t/Inputs/second.h
+
+// Test that each header can compile
+// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++11 -fblocks %t/Inputs/first.h
+// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++11 -fblocks %t/Inputs/second.h
+
+// Build module map file
+// RUN: echo "module FirstModule {"     >> %t/Inputs/module.map
+// RUN: echo "    header \"first.h\""   >> %t/Inputs/module.map
+// RUN: echo "}"                        >> %t/Inputs/module.map
+// RUN: echo "module SecondModule {"    >> %t/Inputs/module.map
+// RUN: echo "    header \"second.h\""  >> %t/Inputs/module.map
+// RUN: echo "}"                        >> %t/Inputs/module.map
+
+// Run test
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps \
+// RUN:            -fmodules-cache-path=%t/cache -x c++ -I%t/Inputs \
+// RUN:            -verify %s -std=c++11 -fblocks
+
+#if !defined(FIRST) && !defined(SECOND)
+#include "first.h"
+#include "second.h"
+#endif
+
+// Used for testing
+#if defined(FIRST)
+#define ACCESS public:
+#elif defined(SECOND)
+#define ACCESS private:
+#endif
+
+// TODO: S1, S2, and S3 should generate errors.
+namespace Blocks {
+#if defined(FIRST)
+struct S1 {
+  void (^block)(int x) = ^(int x) { };
+};
+#elif defined(SECOND)
+struct S1 {
+  void (^block)(int x) = ^(int y) { };
+};
+#else
+S1 s1;
+#endif
+
+#if defined(FIRST)
+struct S2 {
+  int (^block)(int x) = ^(int x) { return x + 1; };
+};
+#elif defined(SECOND)
+struct S2 {
+  int (^block)(int x) = ^(int x) { return x; };
+};
+#else
+S2 s2;
+#endif
+
+#if defined(FIRST)
+struct S3 {
+  void run(int (^block)(int x));
+};
+#elif defined(SECOND)
+struct S3 {
+  void run(int (^block)(int x, int y));
+};
+#else
+S3 s3;
+#endif
+
+#define DECLS                                       \
+  int (^block)(int x) = ^(int x) { return x + x; }; \
+  void run(int (^block)(int x, int y));
+
+#if defined(FIRST) || defined(SECOND)
+struct Valid1 {
+  DECLS
+};
+#else
+Valid1 v1;
+#endif
+
+#if defined(FIRST) || defined(SECOND)
+struct Invalid1 {
+  DECLS
+  ACCESS
+};
+#else
+Invalid1 i1;
+// expected-error@second.h:* {{'Blocks::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}}
+// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}}
+#endif
+
+#undef DECLS
+}
+
+// Keep macros contained to one file.
+#ifdef FIRST
+#undef FIRST
+#endif
+
+#ifdef SECOND
+#undef SECOND
+#endif
+
+#ifdef ACCESS
+#undef ACCESS
+#endif
diff --git a/test/Preprocessor/cuda-types.cu b/test/Preprocessor/cuda-types.cu
index 9e96f6a15e6e..4ad3e4d97aa2 100644
--- a/test/Preprocessor/cuda-types.cu
+++ b/test/Preprocessor/cuda-types.cu
@@ -9,40 +9,40 @@
 
 // RUN: %clang --cuda-host-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-host-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-host-defines-filtered
 // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-device-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-device-defines-filtered
 // RUN: diff %t/i386-host-defines-filtered %t/i386-device-defines-filtered
 
 // RUN: %clang --cuda-host-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-host-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-host-defines-filtered
 // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-device-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-device-defines-filtered
 // RUN: diff %t/x86_64-host-defines-filtered %t/x86_64-device-defines-filtered
 
 // RUN: %clang --cuda-host-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/powerpc64-host-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/powerpc64-host-defines-filtered
 // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/powerpc64-device-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/powerpc64-device-defines-filtered
 // RUN: diff %t/powerpc64-host-defines-filtered %t/powerpc64-device-defines-filtered
 
 // RUN: %clang --cuda-host-only -nocudainc -target i386-windows-msvc -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-host-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-host-defines-filtered
 // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target i386-windows-msvc -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-device-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-device-defines-filtered
 // RUN: diff %t/i386-msvc-host-defines-filtered %t/i386-msvc-device-defines-filtered
 
 // RUN: %clang --cuda-host-only -nocudainc -target x86_64-windows-msvc -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-host-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-host-defines-filtered
 // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target x86_64-windows-msvc -x cuda -E -dM -o - /dev/null \
 // RUN:   | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \
-// RUN:   | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-device-defines-filtered
+// RUN:   | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-device-defines-filtered
 // RUN: diff %t/x86_64-msvc-host-defines-filtered %t/x86_64-msvc-device-defines-filtered
diff --git a/test/Sema/_Float128.c b/test/Sema/_Float128.c
deleted file mode 100644
index f0c3c6d555ef..000000000000
--- a/test/Sema/_Float128.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: %clang_cc1 -verify %s
-// RUN: %clang_cc1 -triple powerpc64-linux -verify %s
-// RUN: %clang_cc1 -triple i686-windows-gnu -verify %s
-// RUN: %clang_cc1 -triple x86_64-windows-gnu -verify %s
-// RUN: %clang_cc1 -triple x86_64-windows-msvc -verify %s
-
-#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
-_Float128 f;
-_Float128 tiny = __FLT128_EPSILON__;
-int g(int x, _Float128 *y) {
-  return x + *y;
-}
-
-// expected-no-diagnostics
-#else
-_Float128 f;  // expected-error {{__float128 is not supported on this target}}
-float tiny = __FLT128_EPSILON__; // expected-error{{use of undeclared identifier}}
-int g(int x, _Float128 *y) {  // expected-error {{__float128 is not supported on this target}}
-  return x + *y;
-}
-
-#endif  // defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
diff --git a/test/Sema/tautological-constant-compare.c b/test/Sema/tautological-constant-compare.c
index 65aa7c9abdea..b242f35dc6cf 100644
--- a/test/Sema/tautological-constant-compare.c
+++ b/test/Sema/tautological-constant-compare.c
@@ -2,8 +2,8 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-constant-in-range-compare -DTEST -verify -x c++ %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-type-limit-compare -DTEST -verify %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-type-limit-compare -DTEST -verify -x c++ %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -DTEST -verify %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -DTEST -verify -x c++ %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -verify -x c++ %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wall -verify %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wall -verify -x c++ %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s
diff --git a/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp b/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp
index 9080f67fe0e1..d21fbf289289 100644
--- a/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp
+++ b/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp
@@ -309,6 +309,17 @@ namespace dependent {
   template int New(int);
 }
 
+namespace injected_class_name {
+  template<typename T = void> struct A {
+    A();
+    template<typename U> A(A<U>);
+  };
+  A<int> a;
+  A b = a;
+  using T = decltype(a);
+  using T = decltype(b);
+}
+
 #else
 
 // expected-no-diagnostics
diff --git a/test/SemaTemplate/alignas.cpp b/test/SemaTemplate/alignas.cpp
index 8a1f96e5bdec..680f07b32998 100644
--- a/test/SemaTemplate/alignas.cpp
+++ b/test/SemaTemplate/alignas.cpp
@@ -21,3 +21,14 @@ struct C { char a[16]; };
 
 static_assert(sizeof(my_union<A, B, C>) == 16, "");
 static_assert(alignof(my_union<A, B, C>) == 8, "");
+
+namespace PR35028 {
+  template<class X, int Alignment> struct alignas(X) alignas(long long) alignas(long double) alignas(Alignment) Aligned {
+    union {
+      long long align1;
+      long double align2;
+      char data[sizeof(X)];
+    };
+  };
+  Aligned<int, 1> a;
+}
diff --git a/test/SemaTemplate/cxx17-inline-variables.cpp b/test/SemaTemplate/cxx17-inline-variables.cpp
index 9e6761ee57aa..7fc0aa8eeeb0 100644
--- a/test/SemaTemplate/cxx17-inline-variables.cpp
+++ b/test/SemaTemplate/cxx17-inline-variables.cpp
@@ -16,3 +16,14 @@ namespace CompleteType {
 
   constexpr int n = X<true>::value;
 }
+
+template <typename T> struct A {
+  static const int n;
+  static const int m;
+  constexpr int f() { return n; }
+  constexpr int g() { return n; }
+};
+template <typename T> constexpr int A<T>::n = sizeof(A) + sizeof(T);
+template <typename T> inline constexpr int A<T>::m = sizeof(A) + sizeof(T);
+static_assert(A<int>().f() == 5);
+static_assert(A<int>().g() == 5);
diff --git a/unittests/Lex/LexerTest.cpp b/unittests/Lex/LexerTest.cpp
index d699a44b13fd..317e2c836335 100644
--- a/unittests/Lex/LexerTest.cpp
+++ b/unittests/Lex/LexerTest.cpp
@@ -476,6 +476,8 @@ TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
   std::vector<Token> LexedTokens = Lex("  //  \\\n");
   EXPECT_TRUE(LexedTokens.empty());
+  EXPECT_TRUE(Lex("#include <\\\\").empty());
+  EXPECT_TRUE(Lex("#include <\\\\\n").empty());
 }
 
 TEST_F(LexerTest, StringizingRasString) {

From b99ba46cc70e9300b052ce95aa36b4d05a482f7f Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 24 Jan 2018 20:25:48 +0000
Subject: [PATCH 3/6] Vendor import of compiler-rt release_60 branch r323338:
 https://llvm.org/svn/llvm-project/compiler-rt/branches/release_60@323338

---
 lib/builtins/clear_cache.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c
index 4a01cb46d4ac..451f1c0b1245 100644
--- a/lib/builtins/clear_cache.c
+++ b/lib/builtins/clear_cache.c
@@ -33,6 +33,11 @@ uintptr_t GetCurrentProcess(void);
   #include <machine/sysarch.h>
 #endif
 
+#if defined(__OpenBSD__) && defined(__mips__)
+  #include <sys/types.h>
+  #include <machine/sysarch.h>
+#endif
+
 #if defined(__linux__) && defined(__mips__)
   #include <sys/cachectl.h>
   #include <sys/syscall.h>
@@ -142,6 +147,8 @@ void __clear_cache(void *start, void *end) {
     #else
         syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
     #endif
+#elif defined(__mips__) && defined(__OpenBSD__)
+  cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE);
 #elif defined(__aarch64__) && !defined(__APPLE__)
   uint64_t xstart = (uint64_t)(uintptr_t) start;
   uint64_t xend = (uint64_t)(uintptr_t) end;
@@ -156,12 +163,14 @@ void __clear_cache(void *start, void *end) {
    * uintptr_t in case this runs in an IPL32 environment.
    */
   const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
-  for (addr = xstart; addr < xend; addr += dcache_line_size)
+  for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
+       addr += dcache_line_size)
     __asm __volatile("dc cvau, %0" :: "r"(addr));
   __asm __volatile("dsb ish");
 
   const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
-  for (addr = xstart; addr < xend; addr += icache_line_size)
+  for (addr = xstart & ~(icache_line_size - 1); addr < xend;
+       addr += icache_line_size)
     __asm __volatile("ic ivau, %0" :: "r"(addr));
   __asm __volatile("isb sy");
 #elif defined (__powerpc64__)

From 40e3ad2a19d821f293dbfc500beb6b9f7af41db2 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 24 Jan 2018 20:25:56 +0000
Subject: [PATCH 4/6] Vendor import of libc++ release_60 branch r323338:
 https://llvm.org/svn/llvm-project/libcxx/branches/release_60@323338

---
 include/type_traits                           | 62 +++++++++++++++++--
 .../meta.unary.prop/is_constructible.pass.cpp | 12 ++++
 2 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/include/type_traits b/include/type_traits
index 7a6c992930e4..eb443ee0abd6 100644
--- a/include/type_traits
+++ b/include/type_traits
@@ -3172,6 +3172,14 @@ template <class _A0, class _A1>
 false_type
 __is_constructible2_test(__any, _A0&, _A1&);
 
+template <class _Tp, class _A0, class _A1, class _A2>
+decltype((_Tp(_VSTD::declval<_A0>(), _VSTD::declval<_A1>(), _VSTD::declval<_A2>()), true_type()))
+__is_constructible3_test(_Tp&, _A0&, _A1&, _A2&);
+
+template <class _A0, class _A1, class _A2>
+false_type
+__is_constructible3_test(__any, _A0&, _A1&, _A2&);
+
 template <bool, class _Tp>
 struct __is_constructible0_imp // false, _Tp is not a scalar
     : public common_type
@@ -3196,6 +3204,14 @@ struct __is_constructible2_imp // false, _Tp is not a scalar
              >::type
     {};
 
+template <bool, class _Tp, class _A0, class _A1, class _A2>
+struct __is_constructible3_imp // false, _Tp is not a scalar
+    : public common_type
+             <
+                 decltype(__is_constructible3_test(declval<_Tp&>(), declval<_A0>(), declval<_A1>(), declval<_A2>()))
+             >::type
+    {};
+
 //      handle scalars and reference types
 
 //      Scalars are default constructible, references are not
@@ -3215,6 +3231,11 @@ struct __is_constructible2_imp<true, _Tp, _A0, _A1>
     : public false_type
     {};
 
+template <class _Tp, class _A0, class _A1, class _A2>
+struct __is_constructible3_imp<true, _Tp, _A0, _A1, _A2>
+    : public false_type
+    {};
+
 //      Treat scalars and reference types separately
 
 template <bool, class _Tp>
@@ -3235,6 +3256,12 @@ struct __is_constructible2_void_check
                                 _Tp, _A0, _A1>
     {};
 
+template <bool, class _Tp, class _A0, class _A1, class _A2>
+struct __is_constructible3_void_check
+    : public __is_constructible3_imp<is_scalar<_Tp>::value || is_reference<_Tp>::value,
+                                _Tp, _A0, _A1, _A2>
+    {};
+
 //      If any of T or Args is void, is_constructible should be false
 
 template <class _Tp>
@@ -3252,17 +3279,24 @@ struct __is_constructible2_void_check<true, _Tp, _A0, _A1>
     : public false_type
     {};
 
+template <class _Tp, class _A0, class _A1, class _A2>
+struct __is_constructible3_void_check<true, _Tp, _A0, _A1, _A2>
+    : public false_type
+    {};
+
 //      is_constructible entry point
 
 template <class _Tp, class _A0 = __is_construct::__nat,
-                     class _A1 = __is_construct::__nat>
+                     class _A1 = __is_construct::__nat,
+                     class _A2 = __is_construct::__nat>
 struct _LIBCPP_TEMPLATE_VIS is_constructible
-    : public __is_constructible2_void_check<is_void<_Tp>::value
+    : public __is_constructible3_void_check<is_void<_Tp>::value
                                         || is_abstract<_Tp>::value
                                         || is_function<_Tp>::value
                                         || is_void<_A0>::value
-                                        || is_void<_A1>::value,
-                                           _Tp, _A0, _A1>
+                                        || is_void<_A1>::value
+                                        || is_void<_A2>::value,
+                                           _Tp, _A0, _A1, _A2>
     {};
 
 template <class _Tp>
@@ -3282,6 +3316,16 @@ struct _LIBCPP_TEMPLATE_VIS is_constructible<_Tp, _A0, __is_construct::__nat>
                                            _Tp, _A0>
     {};
 
+template <class _Tp, class _A0, class _A1>
+struct _LIBCPP_TEMPLATE_VIS is_constructible<_Tp, _A0, _A1, __is_construct::__nat>
+    : public __is_constructible2_void_check<is_void<_Tp>::value
+                                        || is_abstract<_Tp>::value
+                                        || is_function<_Tp>::value
+                                        || is_void<_A0>::value
+                                        || is_void<_A1>::value,
+                                           _Tp, _A0, _A1>
+    {};
+
 //      Array types are default constructible if their element type
 //      is default constructible
 
@@ -3300,6 +3344,11 @@ struct __is_constructible2_imp<false, _Ap[_Np], _A0, _A1>
     : public false_type
     {};
 
+template <class _Ap, size_t _Np, class _A0, class _A1, class _A2>
+struct __is_constructible3_imp<false, _Ap[_Np], _A0, _A1, _A2>
+    : public false_type
+    {};
+
 //      Incomplete array types are not constructible
 
 template <class _Ap>
@@ -3317,6 +3366,11 @@ struct __is_constructible2_imp<false, _Ap[], _A0, _A1>
     : public false_type
     {};
 
+template <class _Ap, class _A0, class _A1, class _A2>
+struct __is_constructible3_imp<false, _Ap[], _A0, _A1, _A2>
+    : public false_type
+    {};
+
 #endif // __has_feature(is_constructible)
 
 
diff --git a/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp b/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp
index 1f7c32a8cc07..b90363a5c380 100644
--- a/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp
+++ b/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp
@@ -30,6 +30,7 @@ struct A
 {
     explicit A(int);
     A(int, double);
+    A(int, long, double);
 #if TEST_STD_VER >= 11
 private:
 #endif
@@ -106,6 +107,16 @@ void test_is_constructible()
 #endif
 }
 
+template <class T, class A0, class A1, class A2>
+void test_is_constructible()
+{
+    static_assert(( std::is_constructible<T, A0, A1, A2>::value), "");
+    LIBCPP11_STATIC_ASSERT((std::__libcpp_is_constructible<T, A0, A1, A2>::type::value), "");
+#if TEST_STD_VER > 14
+    static_assert(( std::is_constructible_v<T, A0, A1, A2>), "");
+#endif
+}
+
 template <class T>
 void test_is_not_constructible()
 {
@@ -146,6 +157,7 @@ int main()
     test_is_constructible<int, const int> ();
     test_is_constructible<A, int> ();
     test_is_constructible<A, int, double> ();
+    test_is_constructible<A, int, long, double> ();
     test_is_constructible<int&, int&> ();
 
     test_is_not_constructible<A> ();

From a506d0d6a9a6c2745e058e35ad4c62d1ddc5f20e Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 24 Jan 2018 20:26:03 +0000
Subject: [PATCH 5/6] Vendor import of lld release_60 branch r323338:
 https://llvm.org/svn/llvm-project/lld/branches/release_60@323338

---
 COFF/Driver.cpp                               |  1 +
 ELF/LinkerScript.cpp                          | 28 ++++---
 ELF/OutputSections.cpp                        |  9 ---
 ELF/OutputSections.h                          |  1 +
 ELF/ScriptParser.cpp                          | 11 +++
 ELF/SymbolTable.cpp                           |  3 +-
 ELF/SyntheticSections.cpp                     |  7 +-
 ELF/Writer.cpp                                |  7 +-
 test/ELF/Inputs/as-needed-lazy.s              |  3 +
 test/ELF/Inputs/compress-debug.s              |  5 ++
 test/ELF/as-needed-lazy.s                     | 14 ++++
 test/ELF/compress-debug-sections-reloc.s      | 26 ++++++
 test/ELF/linkerscript/at-self-reference.s     | 63 +++++++++++++++
 test/ELF/linkerscript/at2.s                   | 81 +++++++++++++++++++
 .../compress-debug-sections-custom.s          | 35 ++++++++
 test/ELF/linkerscript/parse-section-in-addr.s | 10 +++
 test/ELF/sysv-hash-no-rosegment.s             | 13 +++
 17 files changed, 289 insertions(+), 28 deletions(-)
 create mode 100644 test/ELF/Inputs/as-needed-lazy.s
 create mode 100644 test/ELF/Inputs/compress-debug.s
 create mode 100644 test/ELF/as-needed-lazy.s
 create mode 100644 test/ELF/compress-debug-sections-reloc.s
 create mode 100644 test/ELF/linkerscript/at-self-reference.s
 create mode 100644 test/ELF/linkerscript/at2.s
 create mode 100644 test/ELF/linkerscript/compress-debug-sections-custom.s
 create mode 100644 test/ELF/linkerscript/parse-section-in-addr.s
 create mode 100644 test/ELF/sysv-hash-no-rosegment.s

diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp
index 1aaec355c7a5..0f3d8fb0b4ef 100644
--- a/COFF/Driver.cpp
+++ b/COFF/Driver.cpp
@@ -57,6 +57,7 @@ bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) {
   errorHandler().ErrorLimitExceededMsg =
       "too many errors emitted, stopping now"
       " (use /ERRORLIMIT:0 to see all errors)";
+  errorHandler().ExitEarly = CanExitEarly;
   Config = make<Configuration>();
   Config->Argv = {Args.begin(), Args.end()};
   Config->CanExitEarly = CanExitEarly;
diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp
index 8f50a977fd75..33a618952456 100644
--- a/ELF/LinkerScript.cpp
+++ b/ELF/LinkerScript.cpp
@@ -608,13 +608,6 @@ void LinkerScript::switchTo(OutputSection *Sec) {
 
   Ctx->OutSec = Sec;
   Ctx->OutSec->Addr = advance(0, Ctx->OutSec->Alignment);
-
-  // If neither AT nor AT> is specified for an allocatable section, the linker
-  // will set the LMA such that the difference between VMA and LMA for the
-  // section is the same as the preceding output section in the same region
-  // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
-  if (Ctx->LMAOffset)
-    Ctx->OutSec->LMAOffset = Ctx->LMAOffset();
 }
 
 // This function searches for a memory region to place the given output
@@ -662,17 +655,28 @@ void LinkerScript::assignOffsets(OutputSection *Sec) {
   if (Ctx->MemRegion)
     Dot = Ctx->MemRegionOffset[Ctx->MemRegion];
 
+  switchTo(Sec);
+
   if (Sec->LMAExpr) {
     uint64_t D = Dot;
     Ctx->LMAOffset = [=] { return Sec->LMAExpr().getValue() - D; };
   }
 
-  switchTo(Sec);
+  if (!Sec->LMARegionName.empty()) {
+    if (MemoryRegion *MR = MemoryRegions.lookup(Sec->LMARegionName)) {
+      uint64_t Offset = MR->Origin - Dot;
+      Ctx->LMAOffset = [=] { return Offset; };
+    } else {
+      error("memory region '" + Sec->LMARegionName + "' not declared");
+    }
+  }
 
-  // We do not support custom layout for compressed debug sectons.
-  // At this point we already know their size and have compressed content.
-  if (Ctx->OutSec->Flags & SHF_COMPRESSED)
-    return;
+  // If neither AT nor AT> is specified for an allocatable section, the linker
+  // will set the LMA such that the difference between VMA and LMA for the
+  // section is the same as the preceding output section in the same region
+  // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
+  if (Ctx->LMAOffset)
+    Ctx->OutSec->LMAOffset = Ctx->LMAOffset();
 
   // The Size previously denoted how many InputSections had been added to this
   // section, and was used for sorting SHF_LINK_ORDER sections. Reset it to
diff --git a/ELF/OutputSections.cpp b/ELF/OutputSections.cpp
index f0677f7e1ca5..94c98284196f 100644
--- a/ELF/OutputSections.cpp
+++ b/ELF/OutputSections.cpp
@@ -183,15 +183,6 @@ template <class ELFT> void OutputSection::maybeCompress() {
       !Name.startswith(".debug_"))
     return;
 
-  // Calculate the section offsets and size pre-compression.
-  Size = 0;
-  for (BaseCommand *Cmd : SectionCommands)
-    if (auto *ISD = dyn_cast<InputSectionDescription>(Cmd))
-      for (InputSection *IS : ISD->Sections) {
-        IS->OutSecOff = alignTo(Size, IS->Alignment);
-        this->Size = IS->OutSecOff + IS->getSize();
-      }
-
   // Create a section header.
   ZDebugHeader.resize(sizeof(Elf_Chdr));
   auto *Hdr = reinterpret_cast<Elf_Chdr *>(ZDebugHeader.data());
diff --git a/ELF/OutputSections.h b/ELF/OutputSections.h
index b2845773e9af..009f45c03333 100644
--- a/ELF/OutputSections.h
+++ b/ELF/OutputSections.h
@@ -99,6 +99,7 @@ class OutputSection final : public BaseCommand, public SectionBase {
   ConstraintKind Constraint = ConstraintKind::NoConstraint;
   std::string Location;
   std::string MemoryRegionName;
+  std::string LMARegionName;
   bool Noload = false;
 
   template <class ELFT> void finalize();
diff --git a/ELF/ScriptParser.cpp b/ELF/ScriptParser.cpp
index 4263944981f2..e068beeee262 100644
--- a/ELF/ScriptParser.cpp
+++ b/ELF/ScriptParser.cpp
@@ -709,6 +709,14 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) {
   if (consume(">"))
     Cmd->MemoryRegionName = next();
 
+  if (consume("AT")) {
+    expect(">");
+    Cmd->LMARegionName = next();
+  }
+
+  if (Cmd->LMAExpr && !Cmd->LMARegionName.empty())
+    error("section can't have both LMA and a load region");
+
   Cmd->Phdrs = readOutputSectionPhdrs();
 
   if (consume("="))
@@ -922,7 +930,10 @@ ByteCommand *ScriptParser::readByteCommand(StringRef Tok) {
 
 StringRef ScriptParser::readParenLiteral() {
   expect("(");
+  bool Orig = InExpr;
+  InExpr = false;
   StringRef Tok = next();
+  InExpr = Orig;
   expect(")");
   return Tok;
 }
diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp
index b6bf21998863..c3a00bea4aaa 100644
--- a/ELF/SymbolTable.cpp
+++ b/ELF/SymbolTable.cpp
@@ -491,12 +491,13 @@ void SymbolTable::addShared(StringRef Name, SharedFile<ELFT> &File,
   if (WasInserted || ((S->isUndefined() || S->isLazy()) &&
                       S->getVisibility() == STV_DEFAULT)) {
     uint8_t Binding = S->Binding;
+    bool WasUndefined = S->isUndefined();
     replaceSymbol<SharedSymbol>(S, File, Name, Sym.getBinding(), Sym.st_other,
                                 Sym.getType(), Sym.st_value, Sym.st_size,
                                 Alignment, VerdefIndex);
     if (!WasInserted) {
       S->Binding = Binding;
-      if (!S->isWeak() && !Config->GcSections)
+      if (!S->isWeak() && !Config->GcSections && WasUndefined)
         File.IsNeeded = true;
     }
   }
diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp
index f878acf8cf3f..a5a851f95400 100644
--- a/ELF/SyntheticSections.cpp
+++ b/ELF/SyntheticSections.cpp
@@ -1823,6 +1823,9 @@ void HashTableSection::finalizeContents() {
 }
 
 void HashTableSection::writeTo(uint8_t *Buf) {
+  // See comment in GnuHashTableSection::writeTo.
+  memset(Buf, 0, Size);
+
   unsigned NumSymbols = InX::DynSymTab->getNumSymbols();
 
   uint32_t *P = reinterpret_cast<uint32_t *>(Buf);
@@ -2435,10 +2438,8 @@ void MergeNoTailSection::finalizeContents() {
   parallelForEachN(0, Concurrency, [&](size_t ThreadId) {
     for (MergeInputSection *Sec : Sections) {
       for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) {
-        if (!Sec->Pieces[I].Live)
-          continue;
         size_t ShardId = getShardId(Sec->Pieces[I].Hash);
-        if ((ShardId & (Concurrency - 1)) == ThreadId)
+        if ((ShardId & (Concurrency - 1)) == ThreadId && Sec->Pieces[I].Live)
           Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I));
       }
     }
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 24c3e1ee207c..5feff456ffa9 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -427,13 +427,14 @@ template <class ELFT> void Writer<ELFT>::run() {
   if (errorCount())
     return;
 
+  Script->assignAddresses();
+
   // If -compressed-debug-sections is specified, we need to compress
   // .debug_* sections. Do it right now because it changes the size of
   // output sections.
-  parallelForEach(OutputSections,
-                  [](OutputSection *Sec) { Sec->maybeCompress<ELFT>(); });
+  for (OutputSection *Sec : OutputSections)
+    Sec->maybeCompress<ELFT>();
 
-  Script->assignAddresses();
   Script->allocateHeaders(Phdrs);
 
   // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a
diff --git a/test/ELF/Inputs/as-needed-lazy.s b/test/ELF/Inputs/as-needed-lazy.s
new file mode 100644
index 000000000000..7f9c360dda20
--- /dev/null
+++ b/test/ELF/Inputs/as-needed-lazy.s
@@ -0,0 +1,3 @@
+.global foo
+foo:
+  nop
diff --git a/test/ELF/Inputs/compress-debug.s b/test/ELF/Inputs/compress-debug.s
new file mode 100644
index 000000000000..5fd9d39a98a0
--- /dev/null
+++ b/test/ELF/Inputs/compress-debug.s
@@ -0,0 +1,5 @@
+.text
+.fill 0x44
+
+.section .debug_info,"",@progbits
+.fill 0x43
diff --git a/test/ELF/as-needed-lazy.s b/test/ELF/as-needed-lazy.s
new file mode 100644
index 000000000000..e892b9980aad
--- /dev/null
+++ b/test/ELF/as-needed-lazy.s
@@ -0,0 +1,14 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/as-needed-lazy.s -o %t2.o
+# RUN: ld.lld %t2.o -o %t2.so -shared
+# RUN: rm -f %t2.a
+# RUN: llvm-ar rc %t2.a %t2.o
+# RUN: ld.lld %t1.o %t2.a --as-needed %t2.so -o %t
+# RUN: llvm-readobj -d %t | FileCheck %s
+
+# CHECK-NOT: NEEDED
+
+.global _start
+_start:
+  nop
diff --git a/test/ELF/compress-debug-sections-reloc.s b/test/ELF/compress-debug-sections-reloc.s
new file mode 100644
index 000000000000..b4ee4ea6dd97
--- /dev/null
+++ b/test/ELF/compress-debug-sections-reloc.s
@@ -0,0 +1,26 @@
+# REQUIRES: x86, zlib
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/compress-debug.s -o %t2.o
+# RUN: ld.lld %t2.o %t.o -o %t1 --compress-debug-sections=zlib -Ttext=0
+# RUN: llvm-dwarfdump %t1 -debug-str | FileCheck %s
+# These two checks correspond to the patched values of a_sym and a_debug_sym.
+# D = 0x44 - address of .text input section for this file (the start address of
+#     .text is 0 as requested on the command line, and the size of the
+#	  preceding .text in the other input file is 0x44).
+# C = 0x43 - offset of .debug_info section for this file (the size of
+#     the preceding .debug_info from the other input file is 0x43).
+# CHECK: 0x00000000: "D"
+# CHECK: 0x00000004: "C"
+
+.text
+a_sym:
+nop
+
+.section .debug_str,"",@progbits
+.long a_sym
+.long a_debug_sym
+
+.section .debug_info,"",@progbits
+a_debug_sym:
+.long 0x88776655
diff --git a/test/ELF/linkerscript/at-self-reference.s b/test/ELF/linkerscript/at-self-reference.s
new file mode 100644
index 000000000000..7208a4b9fcd4
--- /dev/null
+++ b/test/ELF/linkerscript/at-self-reference.s
@@ -0,0 +1,63 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: echo "SECTIONS { \
+# RUN:  . = 0x1000; \
+# RUN:  .aaa : AT(ADDR(.aaa)) { *(.aaa) } \
+# RUN:  .bbb : AT(ADDR(.bbb)) { *(.bbb) } \
+# RUN: }" > %t.script
+# RUN: ld.lld %t --script %t.script -o %t2
+# RUN: llvm-readobj -program-headers %t2 | FileCheck %s
+
+# CHECK:      ProgramHeaders [
+# CHECK-NEXT:  ProgramHeader {
+# CHECK-NEXT:    Type: PT_LOAD (0x1)
+# CHECK-NEXT:    Offset: 0x1000
+# CHECK-NEXT:    VirtualAddress: 0x1000
+# CHECK-NEXT:    PhysicalAddress: 0x1000
+# CHECK-NEXT:    FileSize: 3
+# CHECK-NEXT:    MemSize: 3
+# CHECK-NEXT:    Flags [ (0x5)
+# CHECK-NEXT:      PF_R (0x4)
+# CHECK-NEXT:      PF_X (0x1)
+# CHECK-NEXT:    ]
+# CHECK-NEXT:    Alignment: 4096
+# CHECK-NEXT:  }
+# CHECK-NEXT:  ProgramHeader {
+# CHECK-NEXT:    Type: PT_LOAD (0x1)
+# CHECK-NEXT:    Offset: 0x1008
+# CHECK-NEXT:    VirtualAddress: 0x1008
+# CHECK-NEXT:    PhysicalAddress: 0x1008
+# CHECK-NEXT:    FileSize: 9
+# CHECK-NEXT:    MemSize: 9
+# CHECK-NEXT:    Flags [ (0x5)
+# CHECK-NEXT:      PF_R (0x4)
+# CHECK-NEXT:      PF_X (0x1)
+# CHECK-NEXT:    ]
+# CHECK-NEXT:    Alignment: 4096
+# CHECK-NEXT:  }
+# CHECK-NEXT:  ProgramHeader {
+# CHECK-NEXT:    Type: PT_GNU_STACK (0x6474E551)
+# CHECK-NEXT:    Offset: 0x0
+# CHECK-NEXT:    VirtualAddress: 0x0
+# CHECK-NEXT:    PhysicalAddress: 0x0
+# CHECK-NEXT:    FileSize: 0
+# CHECK-NEXT:    MemSize: 0
+# CHECK-NEXT:    Flags [ (0x6)
+# CHECK-NEXT:      PF_R (0x4)
+# CHECK-NEXT:      PF_W (0x2)
+# CHECK-NEXT:    ]
+# CHECK-NEXT:    Alignment: 0
+# CHECK-NEXT:  }
+# CHECK-NEXT:]
+
+.global _start
+_start:
+ nop
+
+
+.section .aaa, "a"
+.asciz "aa"
+
+.section .bbb, "a"
+.align 8
+.quad 0
diff --git a/test/ELF/linkerscript/at2.s b/test/ELF/linkerscript/at2.s
new file mode 100644
index 000000000000..1545b1d826a3
--- /dev/null
+++ b/test/ELF/linkerscript/at2.s
@@ -0,0 +1,81 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: echo "MEMORY {                                   \
+# RUN:   AX (ax)   : ORIGIN = 0x2000, LENGTH = 0x100   \
+# RUN:   AW (aw)   : ORIGIN = 0x3000, LENGTH = 0x100   \
+# RUN:   FLASH (ax) : ORIGIN = 0x6000, LENGTH = 0x100   \
+# RUN:   RAM (aw)   : ORIGIN = 0x7000, LENGTH = 0x100 } \
+# RUN: SECTIONS {                                       \
+# RUN:  .foo1 : { *(.foo1) } > AX AT>FLASH             \
+# RUN:  .foo2 : { *(.foo2) } > AX                      \
+# RUN:  .bar1 : { *(.bar1) } > AW AT> RAM              \
+# RUN:  .bar2 : { *(.bar2) } > AW AT > RAM             \
+# RUN:  .bar3 : { *(.bar3) } > AW AT >RAM              \
+# RUN: }" > %t.script
+# RUN: ld.lld %t --script %t.script -o %t2
+# RUN: llvm-readobj -program-headers %t2 | FileCheck %s
+# RUN: llvm-objdump -section-headers %t2 | FileCheck %s --check-prefix=SECTIONS
+
+# CHECK:      ProgramHeaders [
+# CHECK-NEXT:   ProgramHeader {
+# CHECK-NEXT:     Type: PT_LOAD
+# CHECK-NEXT:     Offset: 0x1000
+# CHECK-NEXT:     VirtualAddress: 0x2000
+# CHECK-NEXT:     PhysicalAddress: 0x6000
+# CHECK-NEXT:     FileSize: 16
+# CHECK-NEXT:     MemSize: 16
+# CHECK-NEXT:     Flags [
+# CHECK-NEXT:       PF_R
+# CHECK-NEXT:       PF_X
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Alignment:
+# CHECK-NEXT:   }
+# CHECK-NEXT:   ProgramHeader {
+# CHECK-NEXT:     Type: PT_LOAD
+# CHECK-NEXT:     Offset: 0x2000
+# CHECK-NEXT:     VirtualAddress: 0x3000
+# CHECK-NEXT:     PhysicalAddress: 0x7000
+# CHECK-NEXT:     FileSize: 24
+# CHECK-NEXT:     MemSize: 24
+# CHECK-NEXT:     Flags [
+# CHECK-NEXT:       PF_R
+# CHECK-NEXT:       PF_W
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Alignment: 4096
+# CHECK-NEXT:   }
+
+# SECTIONS:      Sections:
+# SECTIONS-NEXT: Idx Name          Size      Address
+# SECTIONS-NEXT:   0               00000000 0000000000000000
+# SECTIONS-NEXT:   1 .foo1         00000008 0000000000002000
+# SECTIONS-NEXT:   2 .foo2         00000008 0000000000002008
+# SECTIONS-NEXT:   3 .text         00000000 0000000000002010
+# SECTIONS-NEXT:   4 .bar1         00000008 0000000000003000
+# SECTIONS-NEXT:   5 .bar2         00000008 0000000000003008
+# SECTIONS-NEXT:   6 .bar3         00000008 0000000000003010
+  
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: echo "MEMORY {                                            \
+# RUN:   FLASH (ax) : ORIGIN = 0x2000, LENGTH = 0x100            \
+# RUN:   RAM (aw)   : ORIGIN = 0x5000, LENGTH = 0x100 }          \
+# RUN: SECTIONS {                                                \
+# RUN:  .foo1 : AT(0x500) { *(.foo1) } > FLASH AT>FLASH          \
+# RUN: }" > %t2.script
+# RUN: not ld.lld %t --script %t2.script -o %t2 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR
+# ERR: error: section can't have both LMA and a load region
+
+.section .foo1, "ax"
+.quad 0
+
+.section .foo2, "ax"
+.quad 0
+
+.section .bar1, "aw"
+.quad 0
+
+.section .bar2, "aw"
+.quad 0
+
+.section .bar3, "aw"
+.quad 0
diff --git a/test/ELF/linkerscript/compress-debug-sections-custom.s b/test/ELF/linkerscript/compress-debug-sections-custom.s
new file mode 100644
index 000000000000..31fdd56381b0
--- /dev/null
+++ b/test/ELF/linkerscript/compress-debug-sections-custom.s
@@ -0,0 +1,35 @@
+# REQUIRES: x86, zlib
+
+# RUN: echo "SECTIONS { \
+# RUN:          .text : { . += 0x10; *(.text) } \
+# RUN:          .debug_str : { . += 0x10; *(.debug_str) } \
+# RUN:          .debug_info : { . += 0x10; *(.debug_info) } \
+# RUN:          }" > %t.script
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/../Inputs/compress-debug.s -o %t2.o
+# RUN: ld.lld %t2.o %t.o -o %t1 --compress-debug-sections=zlib -T %t.script
+# RUN: llvm-dwarfdump %t1 -debug-str | FileCheck %s
+# These two checks correspond to the patched values of a_sym and a_debug_sym.
+# T = 0x54 - address of .text input section for this file (the start address of
+#     .text is 0 by default, the size of the preceding .text in the other input
+#	  file is 0x44, and the linker script adds an additional 0x10).
+# S = 0x53 - offset of .debug_info section for this file (the size of
+#     the preceding .debug_info from the other input file is 0x43, and the
+#	  linker script adds an additional 0x10).
+# Also note that the .debug_str offsets are also offset by 0x10, as directed by
+# the linker script.
+# CHECK: 0x00000010: "T"
+# CHECK: 0x00000014: "S"
+
+.text
+a_sym:
+nop
+
+.section .debug_str,"",@progbits
+.long a_sym
+.long a_debug_sym
+
+.section .debug_info,"",@progbits
+a_debug_sym:
+.long 0x88776655
diff --git a/test/ELF/linkerscript/parse-section-in-addr.s b/test/ELF/linkerscript/parse-section-in-addr.s
new file mode 100644
index 000000000000..7a79f646310d
--- /dev/null
+++ b/test/ELF/linkerscript/parse-section-in-addr.s
@@ -0,0 +1,10 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+
+# RUN: echo "SECTIONS {                                   \
+# RUN:         .foo-bar : AT(ADDR(.foo-bar)) { *(.text) } \
+# RUN:       }" > %t.script
+# RUN: ld.lld -o %t.so --script %t.script %t.o -shared
+# RUN: llvm-readelf -S %t.so | FileCheck %s
+
+# CHECK: .foo-bar
diff --git a/test/ELF/sysv-hash-no-rosegment.s b/test/ELF/sysv-hash-no-rosegment.s
new file mode 100644
index 000000000000..31b9d2fbec05
--- /dev/null
+++ b/test/ELF/sysv-hash-no-rosegment.s
@@ -0,0 +1,13 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+# RUN: ld.lld -shared --no-rosegment -o %t %t.o
+# RUN: llvm-readobj -hash-table %t | FileCheck %s
+
+# CHECK:      HashTable {
+# CHECK-NEXT:   Num Buckets: 2
+# CHECK-NEXT:   Num Chains: 2
+# CHECK-NEXT:   Buckets: [1, 0]
+# CHECK-NEXT:   Chains: [0, 0]
+# CHECK-NEXT: }
+
+callq undef@PLT

From a2cf70158c66891c9c041270b450e9699b0439fb Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 24 Jan 2018 20:26:12 +0000
Subject: [PATCH 6/6] Vendor import of lldb release_60 branch r323338:
 https://llvm.org/svn/llvm-project/lldb/branches/release_60@323338

---
 lit/CMakeLists.txt  | 4 ++++
 lit/lit.cfg         | 4 ++--
 lit/lit.site.cfg.in | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lit/CMakeLists.txt b/lit/CMakeLists.txt
index 5488154318a9..03fe3d881e9c 100644
--- a/lit/CMakeLists.txt
+++ b/lit/CMakeLists.txt
@@ -11,6 +11,10 @@ else()
   set(ENABLE_SHARED 0)
 endif(BUILD_SHARED_LIBS)
 
+# the value is not canonicalized within LLVM
+llvm_canonicalize_cmake_booleans(
+  LLVM_ENABLE_ZLIB)
+
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg)
diff --git a/lit/lit.cfg b/lit/lit.cfg
index 402d03947ca8..4a190a7328ba 100644
--- a/lit/lit.cfg
+++ b/lit/lit.cfg
@@ -91,11 +91,11 @@ for pattern in [r"\bFileCheck\b",
                           pattern)
     tool_pipe = tool_match.group(2)
     tool_name = tool_match.group(4)
-    tool_path = lit.util.which(tool_name, config.llvm_tools_dir)
+    tool_path = lit.util.which(tool_name, config.environment['PATH'])
     if not tool_path:
         # Warn, but still provide a substitution.
         lit_config.note(
-            'Did not find ' + tool_name + ' in ' + config.llvm_tools_dir)
+            'Did not find ' + tool_name + ' in ' + config.environment['PATH'])
     config.substitutions.append((pattern, tool_pipe + tool_path))
 
 # Shell execution
diff --git a/lit/lit.site.cfg.in b/lit/lit.site.cfg.in
index 2cfa677651a1..c6550877751f 100644
--- a/lit/lit.site.cfg.in
+++ b/lit/lit.site.cfg.in
@@ -12,7 +12,7 @@ config.target_triple = "@TARGET_TRIPLE@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
 config.cc = "@LLDB_TEST_C_COMPILER@"
 config.cxx = "@LLDB_TEST_CXX_COMPILER@"
-config.have_zlib = @HAVE_LIBZ@
+config.have_zlib = @LLVM_ENABLE_ZLIB@
 
 # Support substitution of the tools and libs dirs with user parameters. This is
 # used when we can't determine the tool dir at configuration time.