From 823f87a1f80708ce9ba318cc0b07632e76b74d43 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 1 Feb 2017 21:34:47 +0000
Subject: [PATCH 1/4] Vendor import of llvm release_40 branch r293807:
 https://llvm.org/svn/llvm-project/llvm/branches/release_40@293807

---
 cmake/modules/DetermineGCCCompatible.cmake    |   2 +
 include/llvm/CodeGen/AsmPrinter.h             |   6 +
 include/llvm/CodeGen/SelectionDAGISel.h       |   2 +-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp         |   9 +
 lib/CodeGen/AsmPrinter/DIE.cpp                |   2 +-
 lib/CodeGen/InterleavedAccessPass.cpp         |   8 +-
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp |  12 +-
 lib/MC/MCMachOStreamer.cpp                    |   3 +-
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp   |  17 +-
 lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h   |   4 +-
 lib/Target/Mips/MicroMipsInstrFPU.td          | 107 ++-
 lib/Target/Mips/MicroMipsInstrFormats.td      |   7 +-
 lib/Target/Mips/MipsAsmPrinter.cpp            |  16 +
 lib/Target/Mips/MipsAsmPrinter.h              |   1 +
 lib/Target/Mips/MipsFastISel.cpp              |   4 +-
 lib/Target/Mips/MipsInstrFPU.td               | 211 +++++-
 lib/Target/Mips/MipsInstrFormats.td           |   8 +-
 lib/Target/Mips/MipsTargetObjectFile.cpp      |   8 +
 lib/Target/Mips/MipsTargetObjectFile.h        |   2 +
 lib/Target/PowerPC/PPCInstrInfo.td            |  13 +
 lib/Target/PowerPC/PPCSchedule.td             |   2 +
 lib/Target/PowerPC/PPCScheduleE500mc.td       |   8 +
 lib/Target/PowerPC/PPCScheduleE5500.td        |  10 +-
 .../InstCombine/InstCombineCompares.cpp       |   4 +
 .../InstCombineLoadStoreAlloca.cpp            |   3 +-
 lib/Transforms/Scalar/SCCP.cpp                |   5 +-
 ...rch64-interleaved-accesses-extract-user.ll |  86 ---
 .../AArch64/aarch64-interleaved-accesses.ll   | 393 -----------
 .../arm-interleaved-accesses-extract-user.ll  |  86 ---
 test/CodeGen/ARM/arm-interleaved-accesses.ll  | 462 -------------
 test/CodeGen/SystemZ/pr31710.ll               |  39 ++
 test/DebugInfo/Mips/tls.ll                    |  22 +
 .../Mips/mips4/valid-xfail-mips4.txt          |  38 --
 .../PowerPC/ppc64-encoding-bookIII.txt        |   9 +
 test/MC/MachO/ARM/no-tls-assert.ll            |  28 +
 test/MC/Mips/micromips/valid.s                |  36 +
 .../MC/Mips/mips1/invalid-mips4-wrong-error.s |   4 +-
 test/MC/Mips/mips1/invalid-mips4.s            |  14 +-
 .../MC/Mips/mips1/invalid-mips5-wrong-error.s |  32 +
 test/MC/Mips/mips1/invalid-mips5.s            |  14 +-
 test/MC/Mips/mips2/invalid-mips32.s           |  46 +-
 test/MC/Mips/mips2/invalid-mips32r2.s         |  46 +-
 .../MC/Mips/mips2/invalid-mips4-wrong-error.s |   4 +-
 test/MC/Mips/mips2/invalid-mips4.s            |  14 +-
 test/MC/Mips/mips2/invalid-mips5.s            |  47 +-
 .../MC/Mips/mips3/invalid-mips4-wrong-error.s |  10 -
 test/MC/Mips/mips3/invalid-mips4.s            |  46 +-
 .../MC/Mips/mips3/invalid-mips5-wrong-error.s |   1 +
 test/MC/Mips/mips3/invalid-mips5.s            |  47 +-
 test/MC/Mips/mips32/valid-xfail.s             |  44 +-
 test/MC/Mips/mips32/valid.s                   |  36 +-
 test/MC/Mips/mips32r2/valid-xfail.s           |  28 -
 test/MC/Mips/mips32r2/valid.s                 |  36 +-
 test/MC/Mips/mips32r3/valid-xfail.s           |  28 -
 test/MC/Mips/mips32r3/valid.s                 |  36 +-
 test/MC/Mips/mips32r5/valid-xfail.s           |  28 -
 test/MC/Mips/mips32r5/valid.s                 |  36 +-
 test/MC/Mips/mips4/valid-xfail.s              |  44 +-
 test/MC/Mips/mips4/valid.s                    |  36 +-
 test/MC/Mips/mips5/valid-xfail.s              |  28 -
 test/MC/Mips/mips5/valid.s                    |  36 +-
 test/MC/Mips/mips64/valid-xfail.s             |  28 -
 test/MC/Mips/mips64/valid.s                   |  36 +-
 test/MC/Mips/mips64r2/valid-xfail.s           |  28 -
 test/MC/Mips/mips64r2/valid.s                 |  36 +-
 test/MC/Mips/mips64r3/valid-xfail.s           |  28 -
 test/MC/Mips/mips64r3/valid.s                 |  36 +-
 test/MC/Mips/mips64r5/valid-xfail.s           |  28 -
 test/MC/Mips/mips64r5/valid.s                 |  36 +-
 test/MC/PowerPC/ppc64-encoding-bookIII.s      |  13 +
 .../Transforms/IPConstantProp/naked-return.ll |  28 +
 .../InstCombine/indexed-gep-compares.ll       |  17 +
 test/Transforms/InstCombine/load.ll           |  19 +
 .../interleaved-accesses-extract-user.ll      | 113 +++
 .../AArch64/interleaved-accesses.ll           | 567 +++++++++++++++
 .../InterleavedAccess/AArch64/lit.local.cfg   |   2 +
 .../ARM/interleaved-accesses-extract-user.ll  | 113 +++
 .../ARM/interleaved-accesses.ll               | 646 ++++++++++++++++++
 .../InterleavedAccess/ARM/lit.local.cfg       |   2 +
 test/tools/llvm-symbolizer/coff-exports.test  |   4 +-
 80 files changed, 2683 insertions(+), 1511 deletions(-)
 delete mode 100644 test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
 delete mode 100644 test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
 delete mode 100644 test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll
 delete mode 100644 test/CodeGen/ARM/arm-interleaved-accesses.ll
 create mode 100644 test/CodeGen/SystemZ/pr31710.ll
 create mode 100644 test/DebugInfo/Mips/tls.ll
 delete mode 100644 test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt
 create mode 100644 test/MC/MachO/ARM/no-tls-assert.ll
 delete mode 100644 test/MC/Mips/mips3/invalid-mips4-wrong-error.s
 create mode 100644 test/Transforms/IPConstantProp/naked-return.ll
 create mode 100644 test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-extract-user.ll
 create mode 100644 test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
 create mode 100644 test/Transforms/InterleavedAccess/AArch64/lit.local.cfg
 create mode 100644 test/Transforms/InterleavedAccess/ARM/interleaved-accesses-extract-user.ll
 create mode 100644 test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
 create mode 100644 test/Transforms/InterleavedAccess/ARM/lit.local.cfg

diff --git a/cmake/modules/DetermineGCCCompatible.cmake b/cmake/modules/DetermineGCCCompatible.cmake
index 1bf15fcba72f..1369ebe9d0e2 100644
--- a/cmake/modules/DetermineGCCCompatible.cmake
+++ b/cmake/modules/DetermineGCCCompatible.cmake
@@ -7,5 +7,7 @@ if(NOT DEFINED LLVM_COMPILER_IS_GCC_COMPATIBLE)
     set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF)
   elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
     set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
+  elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" )
+    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
   endif()
 endif()
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index f0be955110fb..4daca0347b77 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -480,6 +480,12 @@ class AsmPrinter : public MachineFunctionPass {
   /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
   virtual unsigned getISAEncoding() { return 0; }
 
+  /// Emit the directive and value for debug thread local expression
+  ///
+  /// \p Value - The value to emit.
+  /// \p Size - The size of the integer (in bytes) to emit.
+  virtual void EmitDebugValue(const MCExpr *Value, unsigned Size) const;
+
   //===------------------------------------------------------------------===//
   // Dwarf Lowering Routines
   //===------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 7f4549d3058f..61d7ec4ecf5b 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -305,7 +305,7 @@ class SelectionDAGISel : public MachineFunctionPass {
   std::vector<unsigned> OpcodeOffset;
 
   void UpdateChains(SDNode *NodeToMatch, SDValue InputChain,
-                    const SmallVectorImpl<SDNode *> &ChainNodesMatched,
+                    SmallVectorImpl<SDNode *> &ChainNodesMatched,
                     bool isMorphNodeTo);
 };
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9f6caa95a9ed..24fdbfc901fd 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -567,6 +567,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   OutStreamer->AddBlankLine();
 }
 
+/// Emit the directive and value for debug thread local expression
+///
+/// \p Value - The value to emit.
+/// \p Size - The size of the integer (in bytes) to emit.
+void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+                                      unsigned Size) const {
+  OutStreamer->EmitValue(Value, Size);
+}
+
 /// EmitFunctionHeader - This method emits the header for the current
 /// function.
 void AsmPrinter::EmitFunctionHeader() {
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index a8a3b30d5b60..879918995472 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -484,7 +484,7 @@ void DIEInteger::print(raw_ostream &O) const {
 /// EmitValue - Emit expression value.
 ///
 void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
-  AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
+  AP->EmitDebugValue(Expr, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of expression value in bytes.
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index c8f79d7fb71c..ec35b3f6449e 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
 /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
 /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
 static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
-                               unsigned MaxFactor) {
+                               unsigned MaxFactor, unsigned OpNumElts) {
   unsigned NumElts = Mask.size();
   if (NumElts < 4)
     return false;
@@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
 
       if (StartMask < 0)
         break;
+      // We must stay within the vectors; This case can happen with undefs.
+      if (StartMask + LaneLen > OpNumElts*2)
+        break;
     }
 
     // Found an interleaved mask of current factor.
@@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore(
 
   // Check if the shufflevector is RE-interleave shuffle.
   unsigned Factor;
-  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor))
+  unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
     return false;
 
   DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6d717b44eb72..004fa703c192 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2248,7 +2248,7 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
 /// to use the new results.
 void SelectionDAGISel::UpdateChains(
     SDNode *NodeToMatch, SDValue InputChain,
-    const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+    SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
   SmallVector<SDNode*, 4> NowDeadNodes;
 
   // Now that all the normal results are replaced, we replace the chain and
@@ -2260,6 +2260,11 @@ void SelectionDAGISel::UpdateChains(
     // Replace all the chain results with the final chain we ended up with.
     for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
       SDNode *ChainNode = ChainNodesMatched[i];
+      // If ChainNode is null, it's because we replaced it on a previous
+      // iteration and we cleared it out of the map. Just skip it.
+      if (!ChainNode)
+        continue;
+
       assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
              "Deleted node left in chain");
 
@@ -2272,6 +2277,11 @@ void SelectionDAGISel::UpdateChains(
       if (ChainVal.getValueType() == MVT::Glue)
         ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
       assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+      SelectionDAG::DAGNodeDeletedListener NDL(
+          *CurDAG, [&](SDNode *N, SDNode *E) {
+            std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
+                         static_cast<SDNode *>(nullptr));
+          });
       CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
 
       // If the node became dead and we haven't already seen it, delete it.
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 45a497240b4e..bd425bb73093 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -142,7 +142,8 @@ static bool canGoAfterDWARF(const MCSectionMachO &MSec) {
   if (SegName == "__TEXT" && SecName == "__eh_frame")
     return true;
 
-  if (SegName == "__DATA" && SecName == "__nl_symbol_ptr")
+  if (SegName == "__DATA" && (SecName == "__nl_symbol_ptr" ||
+                              SecName == "__thread_ptr"))
     return true;
 
   return false;
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 97ca11ca501e..d054578deb67 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -413,6 +413,7 @@ class MipsAsmParser : public MCTargetAsmParser {
     Match_RequiresDifferentOperands,
     Match_RequiresNoZeroRegister,
     Match_RequiresSameSrcAndDst,
+    Match_NoFCCRegisterForCurrentISA,
     Match_NonZeroOperandForSync,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "MipsGenAsmMatcher.inc"
@@ -1461,8 +1462,6 @@ class MipsOperand : public MCParsedAsmOperand {
   bool isFCCAsmReg() const {
     if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
       return false;
-    if (!AsmParser.hasEightFccRegisters())
-      return RegIdx.Index == 0;
     return RegIdx.Index <= 7;
   }
   bool isACCAsmReg() const {
@@ -4053,6 +4052,7 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
     return Match_RequiresSameSrcAndDst;
   }
 }
+
 unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   switch (Inst.getOpcode()) {
   // As described by the MIPSR6 spec, daui must not use the zero operand for
@@ -4131,9 +4131,15 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
     if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
       return Match_RequiresDifferentOperands;
     return Match_Success;
-  default:
-    return Match_Success;
   }
+
+  uint64_t TSFlags = getInstDesc(Inst.getOpcode()).TSFlags;
+  if ((TSFlags & MipsII::HasFCCRegOperand) &&
+      (Inst.getOperand(0).getReg() != Mips::FCC0) && !hasEightFccRegisters())
+    return Match_NoFCCRegisterForCurrentISA;
+
+  return Match_Success;
+
 }
 
 static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
@@ -4191,6 +4197,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "invalid operand ($zero) for instruction");
   case Match_RequiresSameSrcAndDst:
     return Error(IDLoc, "source and destination must match");
+  case Match_NoFCCRegisterForCurrentISA:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "non-zero fcc register doesn't exist in current ISA level");
   case Match_Immz:
     return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
   case Match_UImm1_0:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 35de7b27bf10..a90db2384c46 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -123,7 +123,9 @@ namespace MipsII {
     HasForbiddenSlot = 1 << 5,
     /// IsPCRelativeLoad - A Load instruction with implicit source register
     ///                    ($pc) with explicit offset and destination register
-    IsPCRelativeLoad = 1 << 6
+    IsPCRelativeLoad = 1 << 6,
+    /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
+    HasFCCRegOperand = 1 << 7
 
   };
 }
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index fc83761e409b..5600f71ff68e 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -27,9 +27,20 @@ def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
                SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6;
 
 def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
-                  CEQS_FM_MM<0>;
+                  CEQS_FM_MM<0> {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}
+
 def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
-                  CEQS_FM_MM<1>;
+                  CEQS_FM_MM<1> {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}
 
 def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
               BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
@@ -164,6 +175,98 @@ let AdditionalPredicates = [InMicroMips] in {
     def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>,
                   LW_FM_MM<0x26>;
   }
+
+  multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt,
+                      InstrItinClass itin> {
+    def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+                    C_COND_FM_MM<fmt, 0> {
+      let BaseOpcode = "c.f."#NAME;
+      let isCommutable = 1;
+    }
+    def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 1> {
+      let BaseOpcode = "c.un."#NAME;
+      let isCommutable = 1;
+    }
+    def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 2> {
+      let BaseOpcode = "c.eq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 3> {
+      let BaseOpcode = "c.ueq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 4> {
+      let BaseOpcode = "c.olt."#NAME;
+    }
+    def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 5> {
+      let BaseOpcode = "c.ult."#NAME;
+    }
+    def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 6> {
+      let BaseOpcode = "c.ole."#NAME;
+    }
+    def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+                       C_COND_FM_MM<fmt, 7> {
+      let BaseOpcode = "c.ule."#NAME;
+    }
+    def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 8> {
+      let BaseOpcode = "c.sf."#NAME;
+      let isCommutable = 1;
+    }
+    def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+                       C_COND_FM_MM<fmt, 9> {
+      let BaseOpcode = "c.ngle."#NAME;
+    }
+    def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 10> {
+      let BaseOpcode = "c.seq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 11> {
+      let BaseOpcode = "c.ngl."#NAME;
+    }
+    def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 12> {
+      let BaseOpcode = "c.lt."#NAME;
+    }
+    def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 13> {
+      let BaseOpcode = "c.nge."#NAME;
+    }
+    def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 14> {
+      let BaseOpcode = "c.le."#NAME;
+    }
+    def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 15> {
+      let BaseOpcode = "c.ngt."#NAME;
+    }
+  }
+
+  defm S   : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>,
+             ISA_MIPS1_NOT_32R6_64R6;
+  defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  let DecoderNamespace = "Mips64" in
+  defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm S_MM   : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6;
+  defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm : BC1_ALIASES<BC1T_MM, "bc1t", BC1F_MM, "bc1f">,
+         ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 8b595f9e6c4c..774976828a0c 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -766,6 +766,7 @@ class SWXC1_FM_MM<bits<9> funct> : MMArch {
 class CEQS_FM_MM<bits<2> fmt> : MMArch {
   bits<5> fs;
   bits<5> ft;
+  bits<3> fcc;
   bits<4> cond;
 
   bits<32> Inst;
@@ -773,13 +774,17 @@ class CEQS_FM_MM<bits<2> fmt> : MMArch {
   let Inst{31-26} = 0x15;
   let Inst{25-21} = ft;
   let Inst{20-16} = fs;
-  let Inst{15-13} = 0x0;  // cc
+  let Inst{15-13} = fcc;
   let Inst{12}    = 0;
   let Inst{11-10} = fmt;
   let Inst{9-6}   = cond;
   let Inst{5-0}   = 0x3c;
 }
 
+class C_COND_FM_MM<bits <2> fmt, bits<4> c> : CEQS_FM_MM<fmt> {
+  let cond = c;
+}
+
 class BC1F_FM_MM<bits<5> tf> : MMArch {
   bits<16> offset;
 
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 179695bc6988..04d6529a073d 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1037,6 +1037,22 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   // TODO: implement
 }
 
+// Emit .dtprelword or .dtpreldword directive
+// and value for debug thread local expression.
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value,
+                                          unsigned Size) const {
+  switch (Size) {
+  case 4:
+    OutStreamer->EmitDTPRel32Value(Value);
+    break;
+  case 8:
+    OutStreamer->EmitDTPRel64Value(Value);
+    break;
+  default:
+    llvm_unreachable("Unexpected size of expression value.");
+  }
+}
+
 // Align all targets of indirect branches on bundle size.  Used only if target
 // is NaCl.
 void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 259e557e1283..c5cf5241c236 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -140,6 +140,7 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
   void EmitStartOfAsmFile(Module &M) override;
   void EmitEndOfAsmFile(Module &M) override;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+  void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
 };
 }
 
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 29f3e2c07e04..a44192f57aa0 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -698,8 +698,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
     unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
     emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
     emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
-    emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg(
-        Mips::FCC0, RegState::ImplicitDefine);
+    emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg)
+                 .addReg(RightReg);
     emitInst(CondMovOpc, ResultReg)
         .addReg(RegWithOne)
         .addReg(Mips::FCC0)
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index ab7aa9dcdcae..df42d56d041b 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -219,6 +219,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
   let isTerminator = 1;
   let hasDelaySlot = DelaySlot;
   let Defs = [AT];
+  let hasFCCRegOperand = 1;
 }
 
 class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
@@ -229,41 +230,106 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
          !strconcat("c.$cond.", typestr)>, HARDFLOAT {
   let Defs = [FCC0];
   let isCodeGenOnly = 1;
+  let hasFCCRegOperand = 1;
 }
 
+
+// Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather
+//       duplicating the instruction definition for MIPS1 - MIPS3, we expand
+//       c.cond.ft if necessary, and reject it after constructing the
+//       instruction if the ISA doesn't support it.
 class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC,
                 InstrItinClass itin>  :
-   InstSE<(outs), (ins RC:$fs, RC:$ft),
-          !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin,
-          FrmFR>, HARDFLOAT;
+   InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft),
+          !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin,
+          FrmFR>, HARDFLOAT {
+  let isCompare = 1;
+  let hasFCCRegOperand = 1;
+}
+
 
 multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
                     InstrItinClass itin> {
-  def C_F_#NAME : C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM<fmt, 0>;
-  def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM<fmt, 1>;
-  def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM<fmt, 2>;
-  def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM<fmt, 3>;
-  def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM<fmt, 4>;
-  def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM<fmt, 5>;
-  def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM<fmt, 6>;
-  def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM<fmt, 7>;
-  def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM<fmt, 8>;
-  def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM<fmt, 9>;
-  def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM<fmt, 10>;
-  def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM<fmt, 11>;
-  def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM<fmt, 12>;
-  def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM<fmt, 13>;
-  def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM<fmt, 14>;
-  def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>;
+  def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+                  C_COND_FM<fmt, 0> {
+    let BaseOpcode = "c.f."#NAME;
+    let isCommutable = 1;
+  }
+  def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 1> {
+    let BaseOpcode = "c.un."#NAME;
+    let isCommutable = 1;
+  }
+  def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 2> {
+    let BaseOpcode = "c.eq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 3> {
+    let BaseOpcode = "c.ueq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 4> {
+    let BaseOpcode = "c.olt."#NAME;
+  }
+  def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 5> {
+    let BaseOpcode = "c.ult."#NAME;
+  }
+  def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 6> {
+    let BaseOpcode = "c.ole."#NAME;
+  }
+  def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+                     C_COND_FM<fmt, 7> {
+    let BaseOpcode = "c.ule."#NAME;
+  }
+  def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 8> {
+    let BaseOpcode = "c.sf."#NAME;
+    let isCommutable = 1;
+  }
+  def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+                     C_COND_FM<fmt, 9> {
+    let BaseOpcode = "c.ngle."#NAME;
+  }
+  def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 10> {
+    let BaseOpcode = "c.seq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 11> {
+    let BaseOpcode = "c.ngl."#NAME;
+  }
+  def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 12> {
+    let BaseOpcode = "c.lt."#NAME;
+  }
+  def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 13> {
+    let BaseOpcode = "c.nge."#NAME;
+  }
+  def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 14> {
+    let BaseOpcode = "c.le."#NAME;
+  }
+  def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 15> {
+    let BaseOpcode = "c.ngt."#NAME;
+  }
 }
 
+let AdditionalPredicates = [NotInMicroMips] in {
 defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
 defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
            FGR_32;
 let DecoderNamespace = "Mips64" in
 defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
            FGR_64;
-
+}
 //===----------------------------------------------------------------------===//
 // Floating Point Instructions
 //===----------------------------------------------------------------------===//
@@ -549,13 +615,29 @@ def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
 /// Floating Point Compare
 let AdditionalPredicates = [NotInMicroMips] in {
   def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
-                 ISA_MIPS1_NOT_32R6_64R6;
+                 ISA_MIPS1_NOT_32R6_64R6 {
+
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+  }
   def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-                 ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+                 ISA_MIPS1_NOT_32R6_64R6, FGR_32 {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+  }
 }
 let DecoderNamespace = "Mips64" in
 def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-               ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+               ISA_MIPS1_NOT_32R6_64R6, FGR_64 {
+  // FIXME: This is a required to work around the fact that thiese instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}
 
 //===----------------------------------------------------------------------===//
 // Floating Point Pseudo-Instructions
@@ -602,15 +684,6 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd),
 //===----------------------------------------------------------------------===//
 // InstAliases.
 //===----------------------------------------------------------------------===//
-def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-
 def : MipsInstAlias
         <"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
       ISA_MIPS2, HARDFLOAT;
@@ -630,6 +703,80 @@ def : MipsInstAlias
 def : MipsInstAlias
         <"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
       FGR_64, ISA_MIPS2, HARDFLOAT;
+
+multiclass C_COND_ALIASES<string TypeStr, RegisterOperand RC> {
+  def : MipsInstAlias<!strconcat("c.f.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_F_"#NAME) FCC0,
+                                                       RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.un.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_UN_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.eq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_EQ_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ueq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_UEQ_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.olt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_OLT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ult.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_ULT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ole.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_OLE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ule.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_ULE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.sf.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_SF_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngle.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGLE_"#NAME) FCC0,
+                                                          RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.seq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_SEQ_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngl.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGL_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.lt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_LT_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.nge.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.le.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_LE_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+}
+
+multiclass BC1_ALIASES<Instruction BCTrue, string BCTrueString,
+                       Instruction BCFalse, string BCFalseString> {
+  def : MipsInstAlias<!strconcat(BCTrueString, " $offset"),
+                                (BCTrue FCC0, brtarget:$offset), 1>;
+
+  def : MipsInstAlias<!strconcat(BCFalseString, " $offset"),
+                                (BCFalse FCC0, brtarget:$offset), 1>;
+}
+
+let AdditionalPredicates = [NotInMicroMips] in {
+  defm S   : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6;
+  defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm : BC1_ALIASES<BC1T, "bc1t", BC1F, "bc1f">, ISA_MIPS1_NOT_32R6_64R6,
+         HARDFLOAT;
+  defm : BC1_ALIASES<BC1TL, "bc1tl", BC1FL, "bc1fl">, ISA_MIPS2_NOT_32R6_64R6,
+         HARDFLOAT;
+}
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 1437fb75434a..817d9b44b9c2 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -101,12 +101,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
   bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
                             // ($pc) and with explicit offset and destination
                             // register
+  bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is
+                            // present in MIPS-I to MIPS-III.
 
-  // TSFlags layout should be kept in sync with MipsInstrInfo.h.
+  // TSFlags layout should be kept in sync with MCTargetDesc/MipsBaseInfo.h.
   let TSFlags{3-0}   = FormBits;
   let TSFlags{4}     = isCTI;
   let TSFlags{5}     = hasForbiddenSlot;
   let TSFlags{6}     = IsPCRelativeLoad;
+  let TSFlags{7}     = hasFCCRegOperand;
 
   let DecoderNamespace = "Mips";
 
@@ -829,6 +832,7 @@ class BC1F_FM<bit nd, bit tf> : StdArch {
 class CEQS_FM<bits<5> fmt> : StdArch {
   bits<5> fs;
   bits<5> ft;
+  bits<3> fcc;
   bits<4> cond;
 
   bits<32> Inst;
@@ -837,7 +841,7 @@ class CEQS_FM<bits<5> fmt> : StdArch {
   let Inst{25-21} = fmt;
   let Inst{20-16} = ft;
   let Inst{15-11} = fs;
-  let Inst{10-8} = 0; // cc
+  let Inst{10-8} = fcc;
   let Inst{7-4} = 0x3;
   let Inst{3-0} = cond;
 }
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index fadab7806120..c5d6a05d6611 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -148,3 +148,11 @@ MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL,
   // Otherwise, we work the same as ELF.
   return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
 }
+
+const MCExpr *
+MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+  const MCExpr *Expr =
+      MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+  return MCBinaryExpr::createAdd(
+      Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+}
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index e5423f9578a8..a37ec154ff79 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -42,6 +42,8 @@ class MipsTargetMachine;
     MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                      const Constant *C,
                                      unsigned &Align) const override;
+    /// Describe a TLS variable address within debug info.
+    const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 90111bbea07d..f615cc7cc974 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1508,8 +1508,14 @@ def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
                       PPC970_DGroup_Single;
 } // hasSideEffects = 0
 
+def ICBLC  : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>;
+def ICBLQ  : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
 def ICBT  : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
                        "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
 
 def : Pat<(int_ppc_dcbt xoaddr:$dst),
           (DCBT 0, xoaddr:$dst)>;
@@ -2381,6 +2387,13 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
 def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
                      "mftb $RT, $SPR", IIC_SprMFTB>;
 
+def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR),
+                     "mfpmr $RT, $SPR", IIC_SprMFPMR>;
+
+def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
+                     "mtpmr $SPR, $RT", IIC_SprMTPMR>;
+
+
 // A pseudo-instruction used to implement the read of the 64-bit cycle counter
 // on a 32-bit target.
 let hasSideEffects = 1, usesCustomInserter = 1 in
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index edabe7748673..d240529bc731 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -118,6 +118,8 @@ def IIC_SprTLBIE     : InstrItinClass;
 def IIC_SprABORT     : InstrItinClass;
 def IIC_SprMSGSYNC   : InstrItinClass;
 def IIC_SprSTOP      : InstrItinClass;
+def IIC_SprMFPMR     : InstrItinClass;
+def IIC_SprMTPMR     : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
index f687d326b52d..15d5991b938c 100644
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -249,6 +249,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                   InstrStage<5, [E500_SFX0]>],
                                  [8, 1],
                                  [E500_GPR_Bypass, E500_CR_Bypass]>,
+  InstrItinData<IIC_SprMFPMR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<4, [E500_SFX0]>],
+                                 [7, 1], // Latency = 4, Repeat rate = 4
+                                 [E500_GPR_Bypass, E500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFMSR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                   InstrStage<4, [E500_SFX0]>],
                                  [7, 1], // Latency = 4, Repeat rate = 4
@@ -257,6 +261,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                   InstrStage<1, [E500_SFX0, E500_SFX1]>],
                                  [4, 1], // Latency = 1, Repeat rate = 1
                                  [E500_GPR_Bypass, E500_CR_Bypass]>,
+  InstrItinData<IIC_SprMTPMR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<1, [E500_SFX0]>],
+                                 [4, 1], // Latency = 1, Repeat rate = 1
+                                 [E500_CR_Bypass, E500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFTB,     [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                   InstrStage<4, [E500_SFX0]>],
                                  [7, 1], // Latency = 4, Repeat rate = 4
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
index 5db886cf8f94..32f8e652dd56 100644
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -313,20 +313,24 @@ def PPCE5500Itineraries : ProcessorItineraries<
                                   InstrStage<5, [E5500_CFX_0]>],
                                  [9, 2], // Latency = 5, Repeat rate = 5
                                  [E5500_GPR_Bypass, E5500_CR_Bypass]>,
-  InstrItinData<IIC_SprMFMSR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
-                                  InstrStage<4, [E5500_SFX0]>],
+  InstrItinData<IIC_SprMFPMR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<4, [E5500_CFX_0]>],
                                  [8, 2], // Latency = 4, Repeat rate = 4
                                  [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFSPR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<1, [E5500_CFX_0]>],
                                  [5], // Latency = 1, Repeat rate = 1
                                  [E5500_GPR_Bypass]>,
+  InstrItinData<IIC_SprMTPMR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<1, [E5500_CFX_0]>],
+                                 [5], // Latency = 1, Repeat rate = 1
+                                 [E5500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFTB,     [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<4, [E5500_CFX_0]>],
                                  [8, 2], // Latency = 4, Repeat rate = 4
                                  [NoBypass, E5500_GPR_Bypass]>,
   InstrItinData<IIC_SprMTSPR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
-                                  InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+                                  InstrStage<1, [E5500_CFX_0]>],
                                  [5], // Latency = 1, Repeat rate = 1
                                  [E5500_GPR_Bypass]>,
   InstrItinData<IIC_FPGeneral,   [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 013159cde774..bca1304534b6 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -884,6 +884,10 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
   if (!GEPLHS->hasAllConstantIndices())
     return nullptr;
 
+  // Make sure the pointers have the same type.
+  if (GEPLHS->getType() != RHS->getType())
+    return nullptr;
+
   Value *PtrBase, *Index;
   std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
 
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 388c5e4e7fa4..49e516e9c176 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -502,7 +502,8 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
       !DL.isNonIntegralPointerType(Ty)) {
     if (all_of(LI.users(), [&LI](User *U) {
           auto *SI = dyn_cast<StoreInst>(U);
-          return SI && SI->getPointerOperand() != &LI;
+          return SI && SI->getPointerOperand() != &LI &&
+                 !SI->getPointerOperand()->isSwiftError();
         })) {
       LoadInst *NewLoad = combineLoadToNewType(
           IC, LI,
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 34be90692481..ede381c4c243 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1705,7 +1705,10 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
 
     // If this is an exact definition of this function, then we can propagate
     // information about its result into callsites of it.
-    if (F.hasExactDefinition())
+    // Don't touch naked functions. They may contain asm returning a
+    // value we don't see, so we may end up interprocedurally propagating
+    // the return value incorrectly.
+    if (F.hasExactDefinition() && !F.hasFnAttribute(Attribute::Naked))
       Solver.AddTrackedFunction(&F);
 
     // If this function only has direct calls that we can see, we can track its
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
deleted file mode 100644
index 8628c4288c69..000000000000
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: opt < %s -mtriple=aarch64 -interleaved-access -S | FileCheck %s
-
-; CHECK-LABEL: @extract_user_basic(
-; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi(
-; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br label %if.merge
-
-if.merge:
-  %E2 = extractelement <8 x i32> %L, i32 2
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi_no_dom(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E2 = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_wrong_const_index(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_wrong_const_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 1
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_undef_index(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_undef_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 undef
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_var_index(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 %I
-  ret void
-}
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
deleted file mode 100644
index 347305abb67a..000000000000
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
+++ /dev/null
@@ -1,393 +0,0 @@
-; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
-; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON
-
-; NEON-LABEL: load_factor2:
-; NEON: ld2 { v0.8b, v1.8b }, [x0]
-; NONEON-LABEL: load_factor2:
-; NONEON-NOT: ld2
-define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
-  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
-  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-  %add = add nsw <8 x i8> %strided.v0, %strided.v1
-  ret <8 x i8> %add
-}
-
-; NEON-LABEL: load_factor3:
-; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: load_factor3:
-; NONEON-NOT: ld3
-define <4 x i32> @load_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
-  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %add = add nsw <4 x i32> %strided.v2, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_factor4:
-; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: load_factor4:
-; NONEON-NOT: ld4
-define <4 x i32> @load_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_factor2:
-; NEON: st2 { v0.8b, v1.8b }, [x0]
-; NONEON-LABEL: store_factor2:
-; NONEON-NOT: st2
-define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
-  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor3:
-; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: store_factor3:
-; NONEON-NOT: st3
-define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor4:
-; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: store_factor4:
-; NONEON-NOT: st4
-define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; The following cases test that interleaved access of pointer vectors can be
-; matched to ldN/stN instruction.
-
-; NEON-LABEL: load_ptrvec_factor2:
-; NEON: ld2 { v0.2d, v1.2d }, [x0]
-; NONEON-LABEL: load_ptrvec_factor2:
-; NONEON-NOT: ld2
-define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
-  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
-  ret <2 x i32*> %strided.v0
-}
-
-; NEON-LABEL: load_ptrvec_factor3:
-; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
-; NONEON-LABEL: load_ptrvec_factor3:
-; NONEON-NOT: ld3
-define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
-  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
-  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
-  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: load_ptrvec_factor4:
-; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
-; NONEON-LABEL: load_ptrvec_factor4:
-; NONEON-NOT: ld4
-define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <8 x i32*>*
-  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
-  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
-  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
-  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor2:
-; NEON: st2 { v0.2d, v1.2d }, [x0]
-; NONEON-LABEL: store_ptrvec_factor2:
-; NONEON-NOT: st2
-define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor3:
-; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0]
-; NONEON-LABEL: store_ptrvec_factor3:
-; NONEON-NOT: st3
-define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor4:
-; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
-; NONEON-LABEL: store_ptrvec_factor4:
-; NONEON-NOT: st4
-define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
-  %base = bitcast i32* %ptr to <8 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
-  ret void
-}
-
-; Following cases check that shuffle maskes with undef indices can be matched
-; into ldN/stN instruction.
-
-; NEON-LABEL: load_undef_mask_factor2:
-; NEON: ld2 { v0.4s, v1.4s }, [x0]
-; NONEON-LABEL: load_undef_mask_factor2:
-; NONEON-NOT: ld2
-define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
-  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
-  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor3:
-; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: load_undef_mask_factor3:
-; NONEON-NOT: ld3
-define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
-  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
-  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %add = add nsw <4 x i32> %strided.v2, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor4:
-; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: load_undef_mask_factor4:
-; NONEON-NOT: ld4
-define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_undef_mask_factor2:
-; NEON: st2 { v0.4s, v1.4s }, [x0]
-; NONEON-LABEL: store_undef_mask_factor2:
-; NONEON-NOT: st2
-define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
-  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor3:
-; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: store_undef_mask_factor3:
-; NONEON-NOT: st3
-define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor4:
-; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: store_undef_mask_factor4:
-; NONEON-NOT: st4
-define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; Check that we do something sane with illegal types.
-
-; NEON-LABEL: load_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: ldr q[[V:[0-9]+]], [x0]
-; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s
-; NEON-NEXT: ret
-; NONEON-LABEL: load_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: ldr s0, [x0]
-; NONEON-NEXT: ldr s1, [x0, #8]
-; NONEON-NEXT: ret
-define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
-  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
-  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  ret <3 x float> %tmp2
-}
-
-; NEON-LABEL: store_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s
-; NEON-NEXT: st1 { v0.d }[0], [x0]
-; NEON-NEXT: ret
-; NONEON-LABEL: store_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2
-; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0
-; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32
-; NONEON-NEXT: str x[[RES]], [x0]
-; NONEON-NEXT: ret
-define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
-  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  store <3 x float> %tmp1, <3 x float>* %p, align 16
-  ret void
-}
-
-; NEON-LABEL: load_factor2_with_extract_user:
-; NEON: ld2 { v0.4s, v1.4s }, [x0]
-; NEON: mov w0, v0.s[1]
-; NONEON-LABEL: load_factor2_with_extract_user:
-; NONEON-NOT: ld2
-define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
-  %1 = load <8 x i32>, <8 x i32>* %a, align 8
-  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %3 = extractelement <8 x i32> %1, i32 2
-  ret i32 %3
-}
-
-; NEON-LABEL: store_general_mask_factor4:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefbeg:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefbeg:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefbeg(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefend:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefend:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefend(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmid:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefmid:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefmid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmulti:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefmulti:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefmulti(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3:
-; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor3:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_undefmultimid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undef_fail:
-; NEON-NOT: st3
-; NONEON-LABEL: store_general_mask_factor3_undef_fail:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_undef_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undeflane:
-; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor3_undeflane:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_undeflane(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_negativestart:
-; NEON-NOT: st3
-; NONEON-LABEL: store_general_mask_factor3_negativestart:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_negativestart(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
diff --git a/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll b/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll
deleted file mode 100644
index 620cb6356411..000000000000
--- a/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: opt < %s -mtriple=arm-eabi -mattr=+neon -interleaved-access -S | FileCheck %s
-
-; CHECK-LABEL: @extract_user_basic(
-; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi(
-; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br label %if.merge
-
-if.merge:
-  %E2 = extractelement <8 x i32> %L, i32 2
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi_no_dom(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E2 = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_wrong_const_index(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_wrong_const_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 1
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_undef_index(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_undef_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 undef
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_var_index(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 %I
-  ret void
-}
diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll
deleted file mode 100644
index 4bd60bc19cf5..000000000000
--- a/test/CodeGen/ARM/arm-interleaved-accesses.ll
+++ /dev/null
@@ -1,462 +0,0 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
-; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON
-
-; NEON-LABEL: load_factor2:
-; NEON: vld2.8 {d16, d17}, [r0]
-; NONEON-LABEL: load_factor2:
-; NONEON-NOT: vld2
-define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
-  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
-  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-  %add = add nsw <8 x i8> %strided.v0, %strided.v1
-  ret <8 x i8> %add
-}
-
-; NEON-LABEL: load_factor3:
-; NEON: vld3.32 {d16, d17, d18}, [r0]
-; NONEON-LABEL: load_factor3:
-; NONEON-NOT: vld3
-define <2 x i32> @load_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <6 x i32>*
-  %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
-  %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
-  %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
-  %add = add nsw <2 x i32> %strided.v2, %strided.v1
-  ret <2 x i32> %add
-}
-
-; NEON-LABEL: load_factor4:
-; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: load_factor4:
-; NONEON-NOT: vld4
-define <4 x i32> @load_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_factor2:
-; NEON: vst2.8 {d16, d17}, [r0]
-; NONEON-LABEL: store_factor2:
-; NONEON-NOT: vst2
-define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
-  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor3:
-; NEON: vst3.32 {d16, d18, d20}, [r0]!
-; NEON: vst3.32 {d17, d19, d21}, [r0]
-; NONEON-LABEL: store_factor3:
-; NONEON-NOT: vst3.32
-define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor4:
-; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: store_factor4:
-; NONEON-NOT: vst4
-define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; The following cases test that interleaved access of pointer vectors can be
-; matched to ldN/stN instruction.
-
-; NEON-LABEL: load_ptrvec_factor2:
-; NEON: vld2.32 {d16, d17}, [r0]
-; NONEON-LABEL: load_ptrvec_factor2:
-; NONEON-NOT: vld2
-define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
-  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
-  ret <2 x i32*> %strided.v0
-}
-
-; NEON-LABEL: load_ptrvec_factor3:
-; NEON: vld3.32 {d16, d17, d18}, [r0]
-; NONEON-LABEL: load_ptrvec_factor3:
-; NONEON-NOT: vld3
-define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
-  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
-  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
-  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: load_ptrvec_factor4:
-; NEON: vld4.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: load_ptrvec_factor4:
-; NONEON-NOT: vld4
-define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <8 x i32*>*
-  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
-  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
-  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
-  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor2:
-; NEON: vst2.32 {d16, d17}, [r0]
-; NONEON-LABEL: store_ptrvec_factor2:
-; NONEON-NOT: vst2
-define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor3:
-; NEON: vst3.32 {d16, d17, d18}, [r0]
-; NONEON-LABEL: store_ptrvec_factor3:
-; NONEON-NOT: vst3
-define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor4:
-; NEON: vst4.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: store_ptrvec_factor4:
-; NONEON-NOT: vst4
-define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
-  %base = bitcast i32* %ptr to <8 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
-  ret void
-}
-
-; Following cases check that shuffle maskes with undef indices can be matched
-; into ldN/stN instruction.
-
-; NEON-LABEL: load_undef_mask_factor2:
-; NEON: vld2.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: load_undef_mask_factor2:
-; NONEON-NOT: vld2
-define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
-  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
-  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor3:
-; NEON: vld3.32 {d16, d18, d20}, [r0]!
-; NEON: vld3.32 {d17, d19, d21}, [r0]
-; NONEON-LABEL: load_undef_mask_factor3:
-; NONEON-NOT: vld3
-define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
-  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
-  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %add = add nsw <4 x i32> %strided.v2, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor4:
-; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: load_undef_mask_factor4:
-; NONEON-NOT: vld4
-define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_undef_mask_factor2:
-; NEON: vst2.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: store_undef_mask_factor2:
-; NONEON-NOT: vst2
-define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
-  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor3:
-; NEON: vst3.32 {d16, d18, d20}, [r0]!
-; NEON: vst3.32 {d17, d19, d21}, [r0]
-; NONEON-LABEL: store_undef_mask_factor3:
-; NONEON-NOT: vst3
-define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor4:
-; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: store_undef_mask_factor4:
-; NONEON-NOT: vst4
-define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; The following test cases check that address spaces are properly handled
-
-; NEON-LABEL: load_address_space
-; NEON: vld3.32
-; NONEON-LABEL: load_address_space
-; NONEON-NOT: vld3
-define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
- %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
- %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
- store <2 x i32> %interleaved, <2 x i32>* %B
- ret void
-}
-
-; NEON-LABEL: store_address_space
-; NEON: vst2.32
-; NONEON-LABEL: store_address_space
-; NONEON-NOT: vst2
-define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
- %tmp0 = load <2 x i32>, <2 x i32>* %A
- %tmp1 = load <2 x i32>, <2 x i32>* %B
- %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
- store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
- ret void
-}
-
-; Check that we do something sane with illegal types.
-
-; NEON-LABEL: load_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: vld1.64 {d16, d17}, [r0:128]
-; NEON-NEXT: vuzp.32 q8, {{.*}}
-; NEON-NEXT: vmov r0, r1, d16
-; NEON-NEXT: vmov r2, r3, {{.*}}
-; NEON-NEXT: mov pc, lr
-; NONEON-LABEL: load_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0]
-; NONEON-NEXT: ldr r1, [r0, #8]
-; NONEON-NEXT: mov r0, [[ELT0]]
-; NONEON-NEXT: mov pc, lr
-define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
-  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
-  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  ret <3 x float> %tmp2
-}
-
-; This lowering isn't great, but it's at least correct.
-
-; NEON-LABEL: store_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: vldr d17, [sp]
-; NEON-NEXT: vmov d16, r2, r3
-; NEON-NEXT: vuzp.32 q8, {{.*}}
-; NEON-NEXT: vstr d16, [r0]
-; NEON-NEXT: mov pc, lr
-; NONEON-LABEL: store_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: stm r0, {r1, r3}
-; NONEON-NEXT: mov pc, lr
-define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
-  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  store <3 x float> %tmp1, <3 x float>* %p, align 16
-  ret void
-}
-
-; NEON-LABEL: load_factor2_with_extract_user:
-; NEON: vld2.32 {d16, d17, d18, d19}, [r0:64]
-; NEON: vmov.32 r0, d16[1]
-; NONEON-LABEL: load_factor2_with_extract_user:
-; NONEON-NOT: vld2
-define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
-  %1 = load <8 x i32>, <8 x i32>* %a, align 8
-  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %3 = extractelement <8 x i32> %1, i32 2
-  ret i32 %3
-}
-
-; NEON-LABEL: store_general_mask_factor4:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefbeg:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefbeg:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefbeg(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefend:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefend:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefend(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmid:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefmid:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefmid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmulti:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefmulti:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefmulti(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_undefmultimid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undef_fail:
-; NEON-NOT: vst3.32
-; NONEON-LABEL: store_general_mask_factor3_undef_fail:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_undef_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undeflane:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_undeflane:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_undeflane(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_endstart_fail:
-; NEON-NOT: vst3.32
-; NONEON-LABEL: store_general_mask_factor3_endstart_fail:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_endstart_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_endstart_pass:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_endstart_pass:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_endstart_pass(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_midstart_fail:
-; NEON-NOT: vst3.32
-; NONEON-LABEL: store_general_mask_factor3_midstart_fail:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_midstart_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_midstart_pass:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_midstart_pass:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_midstart_pass(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
diff --git a/test/CodeGen/SystemZ/pr31710.ll b/test/CodeGen/SystemZ/pr31710.ll
new file mode 100644
index 000000000000..1123827fd939
--- /dev/null
+++ b/test/CodeGen/SystemZ/pr31710.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=s390x-redhat-linux | FileCheck %s
+;
+; Triggers a path in SelectionDAG's UpdateChains where a node is
+; deleted but we try to read it later (pr31710), invoking UB in
+; release mode or hitting an assert if they're enabled.
+
+; CHECK: btldata:
+define void @btldata(i64* %u0, i32** %p0, i32** %p1, i32** %p3, i32** %p5, i32** %p7) {
+entry:
+  %x0 = load i32*, i32** %p0, align 8, !tbaa !0
+  store i64 0, i64* %u0, align 8, !tbaa !4
+  %x1 = load i32*, i32** %p1, align 8, !tbaa !0
+  %x2 = load i32, i32* %x1, align 4, !tbaa !6
+  %x2ext = sext i32 %x2 to i64
+  store i32 %x2, i32* %x1, align 4, !tbaa !6
+  %x3 = load i32*, i32** %p3, align 8, !tbaa !0
+  %ptr = getelementptr inbounds i32, i32* %x3, i64 %x2ext
+  %x4 = load i32, i32* %ptr, align 4, !tbaa !6
+  %x4inc = add nsw i32 %x4, 1
+  store i32 %x4inc, i32* %ptr, align 4, !tbaa !6
+  store i64 undef, i64* %u0, align 8, !tbaa !4
+  %x5 = load i32*, i32** %p5, align 8, !tbaa !0
+  %x6 = load i32, i32* %x5, align 4, !tbaa !6
+  store i32 %x6, i32* %x5, align 4, !tbaa !6
+  %x7 = load i32*, i32** %p7, align 8, !tbaa !0
+  %x8 = load i32, i32* %x7, align 4, !tbaa !6
+  %x8inc = add nsw i32 %x8, 1
+  store i32 %x8inc, i32* %x7, align 4, !tbaa !6
+  ret void
+}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"any pointer", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"long", !2, i64 0}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !2, i64 0}
diff --git a/test/DebugInfo/Mips/tls.ll b/test/DebugInfo/Mips/tls.ll
new file mode 100644
index 000000000000..66867da20c3e
--- /dev/null
+++ b/test/DebugInfo/Mips/tls.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=asm < %s | FileCheck %s -check-prefix=CHECK-WORD
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=asm < %s | FileCheck %s -check-prefix=CHECK-DWORD
+
+@x = thread_local global i32 5, align 4, !dbg !0
+
+; CHECK-WORD: .dtprelword x+32768
+; CHECK-DWORD: .dtpreldword x+32768
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = !DIGlobalVariableExpression(var: !1)
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 4.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "tls.c", directory: "/tmp")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 4.0.0"}
diff --git a/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt b/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt
deleted file mode 100644
index 1b669389e7b4..000000000000
--- a/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
-# XFAIL: *
-0x46 0x2f 0x79 0x32 # CHECK: c.eq.d $fcc1, $f15, $f15
-0x46 0x11 0xc5 0x32 # CHECK: c.eq.s $fcc5, $f24, $f17
-0x46 0x35 0x5c 0x30 # CHECK: c.f.d $fcc4, $f11, $f21
-0x46 0x07 0xf4 0x30 # CHECK: c.f.s $fcc4, $f30, $f7
-0x46 0x21 0x94 0x3e # CHECK: c.le.d $fcc4, $f18, $f1
-0x46 0x04 0xc6 0x3e # CHECK: c.le.s $fcc6, $f24, $f4
-0x46 0x23 0x4b 0x3c # CHECK: c.lt.d $fcc3, $f9, $f3
-0x46 0x0e 0x8a 0x3c # CHECK: c.lt.s $fcc2, $f17, $f14
-0x46 0x30 0xad 0x3d # CHECK: c.nge.d $fcc5, $f21, $f16
-0x46 0x08 0x5b 0x3d # CHECK: c.nge.s $fcc3, $f11, $f8
-0x46 0x17 0xfa 0x3b # CHECK: c.ngl.s $fcc2, $f31, $f23
-0x46 0x17 0x92 0x39 # CHECK: c.ngle.s $fcc2, $f18, $f23
-0x46 0x27 0xc4 0x3f # CHECK: c.ngt.d $fcc4, $f24, $f7
-0x46 0x0d 0x45 0x3f # CHECK: c.ngt.s $fcc5, $f8, $f13
-0x46 0x3f 0x82 0x36 # CHECK: c.ole.d $fcc2, $f16, $f31
-0x46 0x14 0x3b 0x36 # CHECK: c.ole.s $fcc3, $f7, $f20
-0x46 0x3c 0x9c 0x34 # CHECK: c.olt.d $fcc4, $f19, $f28
-0x46 0x07 0xa6 0x34 # CHECK: c.olt.s $fcc6, $f20, $f7
-0x46 0x27 0xfc 0x3a # CHECK: c.seq.d $fcc4, $f31, $f7
-0x46 0x19 0x0f 0x3a # CHECK: c.seq.s $fcc7, $f1, $f25
-0x46 0x39 0x6c 0x33 # CHECK: c.ueq.d $fcc4, $f13, $f25
-0x46 0x1e 0x1e 0x33 # CHECK: c.ueq.s $fcc6, $f3, $f30
-0x46 0x32 0xcf 0x37 # CHECK: c.ule.d $fcc7, $f25, $f18
-0x46 0x1e 0xaf 0x37 # CHECK: c.ule.s $fcc7, $f21, $f30
-0x46 0x31 0x36 0x35 # CHECK: c.ult.d $fcc6, $f6, $f17
-0x46 0x0a 0xc7 0x35 # CHECK: c.ult.s $fcc7, $f24, $f10
-0x46 0x38 0xbe 0x31 # CHECK: c.un.d $fcc6, $f23, $f24
-0x46 0x04 0xf1 0x31 # CHECK: c.un.s $fcc1, $f30, $f4
-0x4e 0x74 0xd4 0xa1 # CHECK: madd.d $f18, $f19, $f26, $f20
-0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
-0x4c 0x32 0xfa 0xa9 # CHECK: msub.d $f10, $f1, $f31, $f18
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
-0x4d 0x33 0x74 0xb1 # CHECK: nmadd.d $f18, $f9, $f14, $f19
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
index 8602a3daeaa3..ae8efcf211fc 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
@@ -134,3 +134,12 @@
 0x7c 0x0b 0x66 0x24
 # CHECK: tlbsx 11, 12
 0x7c 0x0b 0x67 0x24
+
+# CHECK: mfpmr 5, 400
+0x7c 0xb0 0x62 0x9c
+# CHECK: mtpmr 400, 6
+0x7c 0xd0 0x63 0x9c
+# CHECK: icblc 0, 0, 8
+0x7c 0x00 0x41 0xcc
+# CHECK: icbtls 0, 0, 9
+0x7c 0x00 0x4b 0xcc
diff --git a/test/MC/MachO/ARM/no-tls-assert.ll b/test/MC/MachO/ARM/no-tls-assert.ll
new file mode 100644
index 000000000000..3466d4a5a5ff
--- /dev/null
+++ b/test/MC/MachO/ARM/no-tls-assert.ll
@@ -0,0 +1,28 @@
+; RUN: llc -filetype=obj -o - %s | llvm-objdump -section-headers - | FileCheck %s
+; This should not trigger the "Creating regular section after DWARF" assert.
+; CHECK: __text
+; CHECK: __thread_ptr  00000004
+target triple = "thumbv7-apple-ios9.0.0"
+
+@b = external thread_local global i32
+define i32* @func(i32 %a) !dbg !9 {
+  ret i32* @b
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6, !7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "r.ii", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = !{i32 1, !"PIC Level", i32 2}
+!9 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 4, type: !10, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{null, !12}
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !DILocalVariable(name: "a", arg: 1, scope: !9, file: !1, line: 4, type: !12)
+!14 = !DIExpression()
diff --git a/test/MC/Mips/micromips/valid.s b/test/MC/Mips/micromips/valid.s
index c1efc4d09045..cf19a9596832 100644
--- a/test/MC/Mips/micromips/valid.s
+++ b/test/MC/Mips/micromips/valid.s
@@ -210,6 +210,42 @@ recip.s $f2, $f4            # CHECK: recip.s $f2, $f4       # encoding: [0x54,0x
 recip.d $f2, $f4            # CHECK: recip.d $f2, $f4       # encoding: [0x54,0x44,0x52,0x3b]
 rsqrt.s $f3, $f5            # CHECK: rsqrt.s $f3, $f5       # encoding: [0x54,0x65,0x02,0x3b]
 rsqrt.d $f2, $f4            # CHECK: rsqrt.d $f2, $f4       # encoding: [0x54,0x44,0x42,0x3b]
+c.eq.d   $fcc1, $f14, $f14  # CHECK: c.eq.d   $fcc1, $f14, $f14 # encoding: [0x55,0xce,0x24,0xbc]
+c.eq.s   $fcc5, $f24, $f17  # CHECK: c.eq.s   $fcc5, $f24, $f17 # encoding: [0x56,0x38,0xa0,0xbc]
+c.f.d    $fcc4, $f10, $f20  # CHECK: c.f.d    $fcc4, $f10, $f20 # encoding: [0x56,0x8a,0x84,0x3c]
+c.f.s    $fcc4, $f30, $f7   # CHECK: c.f.s    $fcc4, $f30, $f7  # encoding: [0x54,0xfe,0x80,0x3c]
+c.le.d   $fcc4, $f18, $f0   # CHECK: c.le.d   $fcc4, $f18, $f0  # encoding: [0x54,0x12,0x87,0xbc]
+c.le.s   $fcc6, $f24, $f4   # CHECK: c.le.s   $fcc6, $f24, $f4  # encoding: [0x54,0x98,0xc3,0xbc]
+c.lt.d   $fcc3, $f8, $f2    # CHECK: c.lt.d   $fcc3, $f8, $f2   # encoding: [0x54,0x48,0x67,0x3c]
+c.lt.s   $fcc2, $f17, $f14  # CHECK: c.lt.s   $fcc2, $f17, $f14 # encoding: [0x55,0xd1,0x43,0x3c]
+c.nge.d  $fcc5, $f20, $f16  # CHECK: c.nge.d  $fcc5, $f20, $f16 # encoding: [0x56,0x14,0xa7,0x7c]
+c.nge.s  $fcc3, $f11, $f8   # CHECK: c.nge.s  $fcc3, $f11, $f8  # encoding: [0x55,0x0b,0x63,0x7c]
+c.ngl.s  $fcc2, $f31, $f23  # CHECK: c.ngl.s  $fcc2, $f31, $f23 # encoding: [0x56,0xff,0x42,0xfc]
+c.ngle.s $fcc2, $f18, $f23  # CHECK: c.ngle.s $fcc2, $f18, $f23 # encoding: [0x56,0xf2,0x42,0x7c]
+c.ngl.d  $f28, $f28         # CHECK: c.ngl.d  $f28, $f28        # encoding: [0x57,0x9c,0x06,0xfc]
+c.ngle.d $f0, $f16          # CHECK: c.ngle.d $f0, $f16         # encoding: [0x56,0x00,0x06,0x7c]
+c.ngt.d  $fcc4, $f24, $f6   # CHECK: c.ngt.d  $fcc4, $f24, $f6  # encoding: [0x54,0xd8,0x87,0xfc]
+c.ngt.s  $fcc5, $f8, $f13   # CHECK: c.ngt.s  $fcc5, $f8, $f13  # encoding: [0x55,0xa8,0xa3,0xfc]
+c.ole.d  $fcc2, $f16, $f30  # CHECK: c.ole.d  $fcc2, $f16, $f30 # encoding: [0x57,0xd0,0x45,0xbc]
+c.ole.s  $fcc3, $f7, $f20   # CHECK: c.ole.s  $fcc3, $f7, $f20  # encoding: [0x56,0x87,0x61,0xbc]
+c.olt.d  $fcc4, $f18, $f28  # CHECK: c.olt.d  $fcc4, $f18, $f28 # encoding: [0x57,0x92,0x85,0x3c]
+c.olt.s  $fcc6, $f20, $f7   # CHECK: c.olt.s  $fcc6, $f20, $f7  # encoding: [0x54,0xf4,0xc1,0x3c]
+c.seq.d  $fcc4, $f30, $f6   # CHECK: c.seq.d  $fcc4, $f30, $f6  # encoding: [0x54,0xde,0x86,0xbc]
+c.seq.s  $fcc7, $f1, $f25   # CHECK: c.seq.s  $fcc7, $f1, $f25  # encoding: [0x57,0x21,0xe2,0xbc]
+c.sf.d   $f30, $f0          # CHECK: c.sf.d   $f30, $f0         # encoding: [0x54,0x1e,0x06,0x3c]
+c.sf.s   $f14, $f22         # CHECK: c.sf.s   $f14, $f22        # encoding: [0x56,0xce,0x02,0x3c]
+c.ueq.d  $fcc4, $f12, $f24  # CHECK: c.ueq.d  $fcc4, $f12, $f24 # encoding: [0x57,0x0c,0x84,0xfc]
+c.ueq.s  $fcc6, $f3, $f30   # CHECK: c.ueq.s  $fcc6, $f3, $f30  # encoding: [0x57,0xc3,0xc0,0xfc]
+c.ule.d  $fcc7, $f24, $f18  # CHECK: c.ule.d  $fcc7, $f24, $f18 # encoding: [0x56,0x58,0xe5,0xfc]
+c.ule.s  $fcc7, $f21, $f30  # CHECK: c.ule.s  $fcc7, $f21, $f30 # encoding: [0x57,0xd5,0xe1,0xfc]
+c.ult.d  $fcc6, $f6, $f16   # CHECK: c.ult.d  $fcc6, $f6, $f16  # encoding: [0x56,0x06,0xc5,0x7c]
+c.ult.s  $fcc7, $f24, $f10  # CHECK: c.ult.s  $fcc7, $f24, $f10 # encoding: [0x55,0x58,0xe1,0x7c]
+c.un.d   $fcc6, $f22, $f24  # CHECK: c.un.d   $fcc6, $f22, $f24 # encoding: [0x57,0x16,0xc4,0x7c]
+c.un.s   $fcc1, $f30, $f4   # CHECK: c.un.s   $fcc1, $f30, $f4  # encoding: [0x54,0x9e,0x20,0x7c]
+bc1t 8                      # CHECK: bc1t 8                     # encoding: [0x43,0xa0,0x00,0x04]
+bc1f 16                     # CHECK: bc1f 16                    # encoding: [0x43,0x80,0x00,0x08]
+bc1t $fcc1, 4               # CHECK: bc1t $fcc1, 4              # encoding: [0x43,0xa0,0x00,0x02]
+bc1f $fcc2, -20             # CHECK: bc1f $fcc2, -20            # encoding: [0x43,0x80,0xff,0xf6]
 sync                        # CHECK: sync                   # encoding: [0x00,0x00,0x6b,0x7c]
 sync 0                      # CHECK: sync 0                 # encoding: [0x00,0x00,0x6b,0x7c]
 sync 1                      # CHECK: sync 1                 # encoding: [0x00,0x01,0x6b,0x7c]
diff --git a/test/MC/Mips/mips1/invalid-mips4-wrong-error.s b/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
index cc4f56b75957..98f34d857a52 100644
--- a/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
@@ -6,8 +6,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ldc2      $8,-21181($at)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected memory with 16-bit signed offset
         ldc2      $20,-1024($s2)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected memory with 16-bit signed offset
         ldl       $24,-4167($24)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips1/invalid-mips4.s b/test/MC/Mips/mips1/invalid-mips4.s
index 4d5a173b157c..e99fb6628e6c 100644
--- a/test/MC/Mips/mips1/invalid-mips4.s
+++ b/test/MC/Mips/mips1/invalid-mips4.s
@@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -53,19 +53,19 @@
         ldxc1     $f8,$s7($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf      $gp,$8,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d    $f6,$f10,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s    $f23,$f5,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f26,$f20,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt      $zero,$s4,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s    $f30,$f2,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz      $a1,$s6,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips5-wrong-error.s b/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
index 18c0b61ff55f..0564c1a250ca 100644
--- a/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
@@ -44,3 +44,35 @@
         pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.eq.s    $fcc1, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
diff --git a/test/MC/Mips/mips1/invalid-mips5.s b/test/MC/Mips/mips1/invalid-mips5.s
index 6096c3e87b26..f909c07f15f1 100644
--- a/test/MC/Mips/mips1/invalid-mips5.s
+++ b/test/MC/Mips/mips1/invalid-mips5.s
@@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -52,19 +52,19 @@
         luxc1     $f19,$s6($s5)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf      $gp,$8,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d    $f6,$f10,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s    $f23,$f5,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt      $zero,$s4,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s    $f30,$f2,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz      $a1,$s6,$a3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$a3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips32.s b/test/MC/Mips/mips2/invalid-mips32.s
index 1e451fd24668..bd02882de395 100644
--- a/test/MC/Mips/mips2/invalid-mips32.s
+++ b/test/MC/Mips/mips2/invalid-mips32.s
@@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         clo       $11,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         clz       $sp,$gp         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         deret                     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -20,19 +20,19 @@
         maddu     $24,$s2         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfc0      $a2,$14,1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -41,3 +41,35 @@
         mtc0      $9,$29,3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mul       $s0,$s4,$at     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sync      1               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: s-type must be zero or unspecified for pre-MIPS32 ISAs
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
diff --git a/test/MC/Mips/mips2/invalid-mips32r2.s b/test/MC/Mips/mips2/invalid-mips32r2.s
index 6dc8159a9acc..383430d3aa09 100644
--- a/test/MC/Mips/mips2/invalid-mips32r2.s
+++ b/test/MC/Mips/mips2/invalid-mips32r2.s
@@ -5,12 +5,44 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         clo     $t3,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         clz     $sp,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.l.d $f24,$f15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.l.s $f11,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         deret                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -29,19 +61,19 @@
         mfc0    $a2,$14,1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfhc1   $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf    $gp,$8,$fcc0        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf    $gp,$8,$fcc7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf    $gp,$8,$fcc7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d  $f6,$f11,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d  $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d  $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s  $f23,$f5,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s  $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s  $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn    $v1,$s1,$s0         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d  $f27,$f21,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s  $f12,$f0,$s7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt    $zero,$s4,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt    $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt    $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d  $f0,$f2,$fcc0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s  $f30,$f2,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s  $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s  $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz    $a1,$s6,$t1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d  $f12,$f29,$t1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s  $f25,$f7,$v1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips4-wrong-error.s b/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
index ef5657833ca5..5ced993c2e11 100644
--- a/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
@@ -6,8 +6,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        bc1fl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1tl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1fl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1tl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         scd       $15,-8243($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected memory with 9-bit signed offset
         sdl       $a3,-20961($s8) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         sdr       $11,-20423($12) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips2/invalid-mips4.s b/test/MC/Mips/mips2/invalid-mips4.s
index f14ae8c30654..123235eb00b2 100644
--- a/test/MC/Mips/mips2/invalid-mips4.s
+++ b/test/MC/Mips/mips2/invalid-mips4.s
@@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -50,19 +50,19 @@
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwu       $s3,-24086($v1) # CHECK: :[[@LINE]]:23: error: expected memory with 12-bit signed offset
         movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips5.s b/test/MC/Mips/mips2/invalid-mips5.s
index 8f460c7b2732..ea563372fc15 100644
--- a/test/MC/Mips/mips2/invalid-mips5.s
+++ b/test/MC/Mips/mips2/invalid-mips5.s
@@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -48,19 +48,19 @@
         luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf      $gp,$a0,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$a0,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$a0,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz      $a1,$s6,$a1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$a1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -71,3 +71,36 @@
         sdxc1     $f11,$a2($t2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         suxc1     $f12,$k1($t1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         swxc1     $f19,$t0($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+
diff --git a/test/MC/Mips/mips3/invalid-mips4-wrong-error.s b/test/MC/Mips/mips3/invalid-mips4-wrong-error.s
deleted file mode 100644
index c9af39a46fef..000000000000
--- a/test/MC/Mips/mips3/invalid-mips4-wrong-error.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# Instructions that are invalid and are correctly rejected but use the wrong
-# error message at the moment.
-#
-# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
-# RUN:     2>%t1
-# RUN: FileCheck %s < %t1
-
-	.set noat
-        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips3/invalid-mips4.s b/test/MC/Mips/mips3/invalid-mips4.s
index 9cd92d39e315..e95ca00d8031 100644
--- a/test/MC/Mips/mips3/invalid-mips4.s
+++ b/test/MC/Mips/mips3/invalid-mips4.s
@@ -5,26 +5,58 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         ldxc1     $f8,$s7($15)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sdxc1     $f11,$10($14)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         swxc1     $f19,$12($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
diff --git a/test/MC/Mips/mips3/invalid-mips5-wrong-error.s b/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
index cf809d3af2d6..6cfd1625d12d 100644
--- a/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
@@ -44,3 +44,4 @@
         pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+
diff --git a/test/MC/Mips/mips3/invalid-mips5.s b/test/MC/Mips/mips3/invalid-mips5.s
index 307eee82075c..f851e1304bae 100644
--- a/test/MC/Mips/mips3/invalid-mips5.s
+++ b/test/MC/Mips/mips3/invalid-mips5.s
@@ -5,28 +5,61 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
         ldxc1     $f8,$s7($t3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz      $a1,$s6,$a5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.d    $f12,$f29,$a5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sdxc1     $f11,$a6($t2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         suxc1     $f12,$k1($t1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         swxc1     $f19,$t0($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+
diff --git a/test/MC/Mips/mips32/valid-xfail.s b/test/MC/Mips/mips32/valid-xfail.s
index d680740babf8..5b48896cc00e 100644
--- a/test/MC/Mips/mips32/valid-xfail.s
+++ b/test/MC/Mips/mips32/valid-xfail.s
@@ -7,32 +7,20 @@
 # XFAIL: *
 
         .set noat
-        c.eq.d          $fcc1,$f15,$f15
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
-        c.nge.s         $fcc3,$f11,$f8
-        c.ngl.s         $fcc2,$f31,$f23
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
-        c.seq.s         $fcc7,$f1,$f25
-        c.ueq.d         $fcc4,$f13,$f25
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
-        c.un.s          $fcc1,$f30,$f4
+        c.eq.ps         $fcc5,$f0,$f9
+        c.f.ps          $fcc6,$f11,$f11
+        c.le.ps         $fcc1,$f7,$f20
+        c.lt.ps         $f19,$f5
+        c.nge.ps        $f1,$f26
+        c.ngl.ps        $f21,$f30
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ole.ps        $fcc7,$f21,$f8
+        c.olt.ps        $fcc3,$f7,$f16
+        c.seq.ps        $fcc6,$f31,$f14
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ult.ps        $fcc7,$f14,$f0
+        c.un.ps         $fcc4,$f2,$f26
         rorv            $13,$a3,$s5
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index c24abe31fbd0..4685a1f3696b 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
diff --git a/test/MC/Mips/mips32r2/valid-xfail.s b/test/MC/Mips/mips32r2/valid-xfail.s
index 6fab97f7a62a..acd7802628dd 100644
--- a/test/MC/Mips/mips32r2/valid-xfail.s
+++ b/test/MC/Mips/mips32r2/valid-xfail.s
@@ -12,50 +12,22 @@
         addqh.w         $s7,$s7,$k1
         addqh_r.w       $8,$v1,$zero
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         ceil.l.d        $f1,$f3
         ceil.l.s        $f18,$f13
         cfcmsa          $s6,$19
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 8cde9dc5c80a..3cd6a09fdd8c 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
diff --git a/test/MC/Mips/mips32r3/valid-xfail.s b/test/MC/Mips/mips32r3/valid-xfail.s
index a442beb6198a..d8e760fb8078 100644
--- a/test/MC/Mips/mips32r3/valid-xfail.s
+++ b/test/MC/Mips/mips32r3/valid-xfail.s
@@ -12,50 +12,22 @@
         addqh.w         $s7,$s7,$k1
         addqh_r.w       $8,$v1,$zero
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         ceil.l.d        $f1,$f3
         ceil.l.s        $f18,$f13
         cfcmsa          $s6,$19
diff --git a/test/MC/Mips/mips32r3/valid.s b/test/MC/Mips/mips32r3/valid.s
index 67820bdde99a..b69a78776a45 100644
--- a/test/MC/Mips/mips32r3/valid.s
+++ b/test/MC/Mips/mips32r3/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
diff --git a/test/MC/Mips/mips32r5/valid-xfail.s b/test/MC/Mips/mips32r5/valid-xfail.s
index 33ea7c46358f..b0c0861465b5 100644
--- a/test/MC/Mips/mips32r5/valid-xfail.s
+++ b/test/MC/Mips/mips32r5/valid-xfail.s
@@ -12,50 +12,22 @@
         addqh.w         $s7,$s7,$k1
         addqh_r.w       $8,$v1,$zero
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         ceil.l.d        $f1,$f3
         ceil.l.s        $f18,$f13
         cfcmsa          $s6,$19
diff --git a/test/MC/Mips/mips32r5/valid.s b/test/MC/Mips/mips32r5/valid.s
index e53b7f3177c1..5ac82e7fe985 100644
--- a/test/MC/Mips/mips32r5/valid.s
+++ b/test/MC/Mips/mips32r5/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.w.d  $f11,$f25
         ceil.w.s  $f6,$f20
         cfc1      $s1,$21
diff --git a/test/MC/Mips/mips4/valid-xfail.s b/test/MC/Mips/mips4/valid-xfail.s
index 21129bb9dbc4..b55e21083b01 100644
--- a/test/MC/Mips/mips4/valid-xfail.s
+++ b/test/MC/Mips/mips4/valid-xfail.s
@@ -7,31 +7,19 @@
 # XFAIL: *
 
         .set noat
-        c.eq.d          $fcc1,$f15,$f15
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
-        c.nge.s         $fcc3,$f11,$f8
-        c.ngl.s         $fcc2,$f31,$f23
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
-        c.seq.s         $fcc7,$f1,$f25
-        c.ueq.d         $fcc4,$f13,$f25
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
-        c.un.s          $fcc1,$f30,$f4
+        c.eq.ps         $fcc5,$f0,$f9
+        c.f.ps          $fcc6,$f11,$f11
+        c.le.ps         $fcc1,$f7,$f20
+        c.lt.ps         $f19,$f5
+        c.nge.ps        $f1,$f26
+        c.ngl.ps        $f21,$f30
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ole.ps        $fcc7,$f21,$f8
+        c.olt.ps        $fcc3,$f7,$f16
+        c.seq.ps        $fcc6,$f31,$f14
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ult.ps        $fcc7,$f14,$f0
+        c.un.ps         $fcc4,$f2,$f26
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 62cd3905d4ef..69cea599e748 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.l.d  $f1,$f3
         ceil.l.s  $f18,$f13
         ceil.w.d  $f11,$f25
diff --git a/test/MC/Mips/mips5/valid-xfail.s b/test/MC/Mips/mips5/valid-xfail.s
index 8517315780e0..2c6711b47d38 100644
--- a/test/MC/Mips/mips5/valid-xfail.s
+++ b/test/MC/Mips/mips5/valid-xfail.s
@@ -10,50 +10,22 @@
         abs.ps          $f22,$f8
         add.ps          $f25,$f27,$f13
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 77204f996086..85fdfb507aad 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.l.d  $f1,$f3
         ceil.l.s  $f18,$f13
         ceil.w.d  $f11,$f25
diff --git a/test/MC/Mips/mips64/valid-xfail.s b/test/MC/Mips/mips64/valid-xfail.s
index c7513bf5d7e1..9bf899c22703 100644
--- a/test/MC/Mips/mips64/valid-xfail.s
+++ b/test/MC/Mips/mips64/valid-xfail.s
@@ -13,50 +13,22 @@
         alnv.ob         $v31,$v23,$v30,$at
         alnv.ob         $v8,$v17,$v30,$a1
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index f34bfafa8fc2..716488df7b50 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.l.d  $f1,$f3
         ceil.l.s  $f18,$f13
         ceil.w.d  $f11,$f25
diff --git a/test/MC/Mips/mips64r2/valid-xfail.s b/test/MC/Mips/mips64r2/valid-xfail.s
index d55d1fb9a196..90cd760712c2 100644
--- a/test/MC/Mips/mips64r2/valid-xfail.s
+++ b/test/MC/Mips/mips64r2/valid-xfail.s
@@ -12,50 +12,22 @@
         addqh.w         $s7,$s7,$k1
         addqh_r.w       $8,$v1,$zero
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 500a75771798..656b76c758eb 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.l.d  $f1,$f3
         ceil.l.s  $f18,$f13
         ceil.w.d  $f11,$f25
diff --git a/test/MC/Mips/mips64r3/valid-xfail.s b/test/MC/Mips/mips64r3/valid-xfail.s
index 2f38e73e442b..15c59f3f896a 100644
--- a/test/MC/Mips/mips64r3/valid-xfail.s
+++ b/test/MC/Mips/mips64r3/valid-xfail.s
@@ -15,50 +15,22 @@
         alnv.ob         $v31,$v23,$v30,$at
         alnv.ob         $v8,$v17,$v30,$a1
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
diff --git a/test/MC/Mips/mips64r3/valid.s b/test/MC/Mips/mips64r3/valid.s
index 8e2343b0ad16..52d44da8f56f 100644
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.l.d  $f1,$f3
         ceil.l.s  $f18,$f13
         ceil.w.d  $f11,$f25
diff --git a/test/MC/Mips/mips64r5/valid-xfail.s b/test/MC/Mips/mips64r5/valid-xfail.s
index 44ff4ee9fc0b..1ba984bdf905 100644
--- a/test/MC/Mips/mips64r5/valid-xfail.s
+++ b/test/MC/Mips/mips64r5/valid-xfail.s
@@ -15,50 +15,22 @@
         alnv.ob         $v31,$v23,$v30,$at
         alnv.ob         $v8,$v17,$v30,$a1
         alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
         c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
         c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
         c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
         c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
         c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
         c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
         c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
         c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
         c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
         c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
         c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
         c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
         c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
         c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
         c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
diff --git a/test/MC/Mips/mips64r5/valid.s b/test/MC/Mips/mips64r5/valid.s
index f31374f9fce5..f400436b696f 100644
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@@ -41,10 +41,38 @@ a:
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
         ceil.l.d  $f1,$f3
         ceil.l.s  $f18,$f13
         ceil.w.d  $f11,$f25
diff --git a/test/MC/PowerPC/ppc64-encoding-bookIII.s b/test/MC/PowerPC/ppc64-encoding-bookIII.s
index d157ea0feba6..a9d21b299f52 100644
--- a/test/MC/PowerPC/ppc64-encoding-bookIII.s
+++ b/test/MC/PowerPC/ppc64-encoding-bookIII.s
@@ -197,3 +197,16 @@
 # CHECK-BE: tlbsx 11, 12                    # encoding: [0x7c,0x0b,0x67,0x24]
 # CHECK-LE: tlbsx 11, 12                    # encoding: [0x24,0x67,0x0b,0x7c]
             tlbsx %r11, %r12
+
+# CHECK-BE: mfpmr 5, 400                    # encoding: [0x7c,0xb0,0x62,0x9c]
+# CHECK-LE: mfpmr 5, 400                    # encoding: [0x9c,0x62,0xb0,0x7c]
+            mfpmr 5, 400
+# CHECK-BE: mtpmr 400, 6                    # encoding: [0x7c,0xd0,0x63,0x9c]
+# CHECK-LE: mtpmr 400, 6                    # encoding: [0x9c,0x63,0xd0,0x7c]
+            mtpmr 400, 6
+# CHECK-BE: icblc 0, 0, 8                      # encoding: [0x7c,0x00,0x41,0xcc]
+# CHECK-LE: icblc 0, 0, 8                      # encoding: [0xcc,0x41,0x00,0x7c]
+            icblc 0, 0, 8
+# CHECK-BE: icbtls 0, 0, 9                     # encoding: [0x7c,0x00,0x4b,0xcc]
+# CHECK-LE: icbtls 0, 0, 9                     # encoding: [0xcc,0x4b,0x00,0x7c]
+            icbtls 0, 0, 9
diff --git a/test/Transforms/IPConstantProp/naked-return.ll b/test/Transforms/IPConstantProp/naked-return.ll
new file mode 100644
index 000000000000..f52417fcf7ea
--- /dev/null
+++ b/test/Transforms/IPConstantProp/naked-return.ll
@@ -0,0 +1,28 @@
+; RUN: opt -ipsccp -S %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc19.0.24215"
+
+define i32 @dipsy(i32, i32) local_unnamed_addr #0 {
+BasicBlock0:
+  call void asm "\0D\0Apushl %ebp\0D\0Amovl 8(%esp),%eax\0D\0Amovl 12(%esp), %ebp\0D\0Acalll *%eax\0D\0Apopl %ebp\0D\0Aretl\0D\0A", ""()
+  ret i32 0
+}
+
+define void @tinkywinky(i32, i32, i32) local_unnamed_addr #0 {
+BasicBlock1:
+  call void asm "\0D\0A    movl 12(%esp), %ebp\0D\0A    movl 4(%esp), %eax\0D\0A    movl 8(%esp), %esp\0D\0A    jmpl *%eax\0D\0A", ""()
+  ret void
+}
+
+define void @patatino(i32, i32, i32) local_unnamed_addr #1 {
+bb:
+  %3 = tail call i32 @dipsy(i32 %0, i32 %1) #0
+; Check that we don't accidentally propagate zero.
+; CHECK: @tinkywinky(i32 %3, i32 %2, i32 %1) #0
+  tail call void @tinkywinky(i32 %3, i32 %2, i32 %1) #0
+  ret void
+}
+
+attributes #0 = { naked noinline optnone }
+attributes #1 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
diff --git a/test/Transforms/InstCombine/indexed-gep-compares.ll b/test/Transforms/InstCombine/indexed-gep-compares.ll
index 64dff2712976..71afed438d10 100644
--- a/test/Transforms/InstCombine/indexed-gep-compares.ll
+++ b/test/Transforms/InstCombine/indexed-gep-compares.ll
@@ -188,3 +188,20 @@ bb10:
 
 
 declare i32 @__gxx_personality_v0(...)
+
+define i1 @test8(i64* %in, i64 %offset) {
+entry:
+
+ %ld = load i64, i64* %in, align 8
+ %casti8 = inttoptr i64 %ld to i8*
+ %gepi8 = getelementptr inbounds i8, i8* %casti8, i64 %offset
+ %cast = bitcast i8* %gepi8 to i32**
+ %ptrcast = inttoptr i64 %ld to i32**
+ %gepi32 = getelementptr inbounds i32*, i32** %ptrcast, i64 1
+ %cmp = icmp eq i32** %gepi32, %cast
+ ret i1 %cmp
+
+
+; CHECK-LABEL: @test8(
+; CHECK-NOT: icmp eq i32 %{{[0-9A-Za-z.]+}}, 1
+}
diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll
index cad2899ea35d..49ed897fd2ea 100644
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@@ -219,3 +219,22 @@ entry:
   store %swift.error* %err.res, %swift.error** %err, align 8
   ret void
 }
+
+; Make sure we preseve the type of the store to a swifterror pointer.
+; CHECK-LABEL: @test19(
+; CHECK: [[A:%.*]] = alloca
+; CHECK: call
+; CHECK: [[BC:%.*]] = bitcast i8** [[A]] to
+; CHECK: [[ERRVAL:%.*]] =  load {{.*}}[[BC]]
+; CHECK: store {{.*}}[[ERRVAL]]
+; CHECK: ret
+declare void @initi8(i8**)
+define void @test19(%swift.error** swifterror %err) {
+entry:
+  %tmp = alloca i8*, align 8
+  call void @initi8(i8** %tmp)
+  %swifterror = bitcast i8** %tmp to %swift.error**
+  %err.res = load %swift.error*, %swift.error** %swifterror, align 8
+  store %swift.error* %err.res, %swift.error** %err, align 8
+  ret void
+}
diff --git a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-extract-user.ll b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-extract-user.ll
new file mode 100644
index 000000000000..744a36be80e3
--- /dev/null
+++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-extract-user.ll
@@ -0,0 +1,113 @@
+; RUN: opt < %s -interleaved-access -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define void @extract_user_basic(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_basic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_multi(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br label %if.merge
+
+if.merge:
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  ret void
+}
+
+define void @extract_user_multi_no_dom(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi_no_dom(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_wrong_const_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_wrong_const_index(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 1
+  ret void
+}
+
+define void @extract_user_undef_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_undef_index(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 undef
+  ret void
+}
+
+define void @extract_user_var_index(<8 x i32>* %ptr, i32 %i) {
+; CHECK-LABEL: @extract_user_var_index(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 %i
+  ret void
+}
diff --git a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
new file mode 100644
index 000000000000..2a257d490815
--- /dev/null
+++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
@@ -0,0 +1,567 @@
+; RUN: opt < %s -interleaved-access -S | FileCheck %s -check-prefix=NEON
+; RUN: opt < %s -mattr=-neon -interleaved-access -S | FileCheck %s -check-prefix=NO_NEON
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define void @load_factor2(<16 x i8>* %ptr) {
+; NEON-LABEL:    @load_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i8>* %ptr to <8 x i8>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret void
+}
+
+define void @load_factor3(<12 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  ret void
+}
+
+define void @load_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  ret void
+}
+
+define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
+; NEON-LABEL:    @store_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <16 x i8>* %ptr to <8 x i8>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v8i8.p0v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8>* [[TMP3]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_factor3(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_factor4(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_ptrvec_factor2(<4 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <4 x i32*>, <4 x i32*>* %ptr, align 4
+  %v0 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
+  %v1 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 1, i32 3>
+  ret void
+}
+
+define void @load_ptrvec_factor3(<6 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <6 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <6 x i32*>, <6 x i32*>* %ptr, align 4
+  %v0 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @load_ptrvec_factor4(<8 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT:       [[TMP9:%.*]] = inttoptr <2 x i64> [[TMP8]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32*>, <8 x i32*>* %ptr, align 4
+  %v0 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 0, i32 4>
+  %v1 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
+  %v2 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 2, i32 6>
+  %v3 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
+  ret void
+}
+
+define void @store_ptrvec_factor2(<4 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
+; NEON-LABEL:    @store_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = ptrtoint <2 x i32*> %v0 to <2 x i64>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <2 x i32*> %v1 to <2 x i64>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <4 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v2i64.p0v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32*> %interleaved.vec, <4 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor3(<6 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
+; NEON-LABEL:    @store_ptrvec_factor3(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i64>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i64>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP6:%.*]] = bitcast <6 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v2i64.p0v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+  store <6 x i32*> %interleaved.vec, <6 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor4(<8 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
+; NEON-LABEL:    @store_ptrvec_factor4(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i64>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i64>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 6, i32 7>
+; NEON-NEXT:       [[TMP7:%.*]] = bitcast <8 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i64.p0v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+  store <8 x i32*> %interleaved.vec, <8 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @load_undef_mask_factor2(<8 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
+  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
+  ret void
+}
+
+define void @load_undef_mask_factor3(<12 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+  ret void
+}
+
+define void @load_undef_mask_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
+  ret void
+}
+
+define void @store_undef_mask_factor2(<8 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; NEON-LABEL:    @store_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_undef_mask_factor3(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_undef_mask_factor4(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_illegal_factor2(<3 x float>* %ptr) nounwind {
+; NEON-LABEL:    @load_illegal_factor2(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @load_illegal_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <3 x float>, <3 x float>* %ptr, align 16
+  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  ret void
+}
+
+define void @store_illegal_factor2(<3 x float>* %ptr, <3 x float> %v0) nounwind {
+; NEON-LABEL:    @store_illegal_factor2(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_illegal_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  store <3 x float> %interleaved.vec, <3 x float>* %ptr, align 16
+  ret void
+}
+
+define void @store_general_mask_factor4(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefbeg(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefbeg(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefbeg(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefend(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefend(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefend(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmid(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmid(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmid(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmulti(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmulti(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmulti(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undefmultimid(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undefmultimid(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undefmultimid(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undef_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undef_fail(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undef_fail(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undeflane(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undeflane(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undeflane(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_negativestart(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_negativestart(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_negativestart(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+@g = external global <4 x float>
+
+; The following does not give a valid interleaved store
+; NEON-LABEL: define void @no_interleave
+; NEON-NOT: call void @llvm.aarch64.neon.st2
+; NEON: shufflevector
+; NEON: store
+; NEON: ret void
+; NO_NEON-LABEL: define void @no_interleave
+; NO_NEON: shufflevector
+; NO_NEON: store
+; NO_NEON: ret void
+define void @no_interleave(<4 x float> %a0) {
+  %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 3, i32 7, i32 undef>
+  store <4 x float> %v0, <4 x float>* @g, align 16
+  ret void
+}
diff --git a/test/Transforms/InterleavedAccess/AArch64/lit.local.cfg b/test/Transforms/InterleavedAccess/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..304f2436ece1
--- /dev/null
+++ b/test/Transforms/InterleavedAccess/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+  config.unsupported = True
diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-extract-user.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-extract-user.ll
new file mode 100644
index 000000000000..2ea925117494
--- /dev/null
+++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-extract-user.ll
@@ -0,0 +1,113 @@
+; RUN: opt < %s -mattr=+neon -interleaved-access -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "arm---eabi"
+
+define void @extract_user_basic(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_basic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; CHECK-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP0]], i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_multi(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; CHECK-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP0]], i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br label %if.merge
+
+if.merge:
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  ret void
+}
+
+define void @extract_user_multi_no_dom(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi_no_dom(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_wrong_const_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_wrong_const_index(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 1
+  ret void
+}
+
+define void @extract_user_undef_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_undef_index(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 undef
+  ret void
+}
+
+define void @extract_user_var_index(<8 x i32>* %ptr, i32 %i) {
+; CHECK-LABEL: @extract_user_var_index(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 %i
+  ret void
+}
diff --git a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
new file mode 100644
index 000000000000..21eb8d7a1b0a
--- /dev/null
+++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
@@ -0,0 +1,646 @@
+; RUN: opt < %s -mattr=+neon -interleaved-access -S | FileCheck %s -check-prefix=NEON
+; RUN: opt < %s -interleaved-access -S | FileCheck %s -check-prefix=NO_NEON
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "arm---eabi"
+
+define void @load_factor2(<16 x i8>* %ptr) {
+; NEON-LABEL:    @load_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i8>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret void
+}
+
+define void @load_factor3(<6 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <6 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <6 x i32>, <6 x i32>* %ptr, align 4
+  %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @load_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  ret void
+}
+
+define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
+; NEON-LABEL:    @store_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i8>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_factor3(
+; NEON:            [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_factor4(
+; NEON:            [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_ptrvec_factor2(<4 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <4 x i32*>, <4 x i32*>* %ptr, align 4
+  %v0 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
+  ret void
+}
+
+define void @load_ptrvec_factor3(<6 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <6 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <6 x i32*>, <6 x i32*>* %ptr, align 4
+  %v0 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @load_ptrvec_factor4(<8 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       [[TMP9:%.*]] = inttoptr <2 x i32> [[TMP8]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32*>, <8 x i32*>* %ptr, align 4
+  %v0 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 0, i32 4>
+  %v1 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
+  %v2 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 2, i32 6>
+  %v3 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
+  ret void
+}
+
+define void @store_ptrvec_factor2(<4 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
+; NEON-LABEL:    @store_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = ptrtoint <2 x i32*> %v0 to <2 x i32>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <2 x i32*> %v1 to <2 x i32>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <4 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32*> %interleaved.vec, <4 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor3(<6 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
+; NEON-LABEL:    @store_ptrvec_factor3(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i32>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i32>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <6 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+  store <6 x i32*> %interleaved.vec, <6 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor4(<8 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
+; NEON-LABEL:    @store_ptrvec_factor4(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i32>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i32>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <8 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 6, i32 7>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+  store <8 x i32*> %interleaved.vec, <8 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @load_undef_mask_factor2(<8 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
+  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
+  ret void
+}
+
+define void @load_undef_mask_factor3(<12 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+  ret void
+}
+
+define void @load_undef_mask_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
+  ret void
+}
+
+define void @store_undef_mask_factor2(<8 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; NEON-LABEL:    @store_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_undef_mask_factor3(
+; NEON:            [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_undef_mask_factor4(
+; NEON:            [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_address_space(<4 x i32> addrspace(1)* %ptr) {
+; NEON-LABEL:    @load_address_space(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32> addrspace(1)* %ptr to i8 addrspace(1)*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p1i8(i8 addrspace(1)* [[TMP1]], i32 0)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_address_space(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <4 x i32>, <4 x i32> addrspace(1)* %ptr
+  %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @store_address_space(<4 x i32> addrspace(1)* %ptr, <2 x i32> %v0, <2 x i32> %v1) {
+; NEON-LABEL:    @store_address_space(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32> addrspace(1)* %ptr to i8 addrspace(1)*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p1i8.v2i32(i8 addrspace(1)* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], i32 0)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_address_space(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <2 x i32> %v0, <2 x i32> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32> %interleaved.vec, <4 x i32> addrspace(1)* %ptr
+  ret void
+}
+
+define void @load_illegal_factor2(<3 x float>* %ptr) nounwind {
+; NEON-LABEL:    @load_illegal_factor2(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @load_illegal_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <3 x float>, <3 x float>* %ptr, align 16
+  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  ret void
+}
+
+define void @store_illegal_factor2(<3 x float>* %ptr, <3 x float> %v0) nounwind {
+; NEON-LABEL:    @store_illegal_factor2(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_illegal_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  store <3 x float> %interleaved.vec, <3 x float>* %ptr, align 16
+  ret void
+}
+
+define void @store_general_mask_factor4(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefbeg(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefbeg(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefbeg(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefend(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefend(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefend(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmid(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmid(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmid(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmulti(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmulti(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmulti(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undefmultimid(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undefmultimid(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undefmultimid(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undef_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undef_fail(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undef_fail(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undeflane(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undeflane(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undeflane(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_endstart_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_endstart_fail(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_endstart_fail(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_endstart_pass(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_endstart_pass(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_endstart_pass(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_midstart_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_midstart_fail(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_midstart_fail(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_midstart_pass(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_midstart_pass(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_midstart_pass(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+@g = external global <4 x float>
+
+; The following does not give a valid interleaved store
+; NEON-LABEL: define void @no_interleave
+; NEON-NOT: call void @llvm.arm.neon.vst2
+; NEON: shufflevector
+; NEON: store
+; NEON: ret void
+; NO_NEON-LABEL: define void @no_interleave
+; NO_NEON: shufflevector
+; NO_NEON: store
+; NO_NEON: ret void
+define void @no_interleave(<4 x float> %a0) {
+  %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
+  store <4 x float> %v0, <4 x float>* @g, align 16
+  ret void
+}
diff --git a/test/Transforms/InterleavedAccess/ARM/lit.local.cfg b/test/Transforms/InterleavedAccess/ARM/lit.local.cfg
new file mode 100644
index 000000000000..20e19aeb06f9
--- /dev/null
+++ b/test/Transforms/InterleavedAccess/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+  config.unsupported = True
diff --git a/test/tools/llvm-symbolizer/coff-exports.test b/test/tools/llvm-symbolizer/coff-exports.test
index cad1935a03f9..dde90fb4273d 100644
--- a/test/tools/llvm-symbolizer/coff-exports.test
+++ b/test/tools/llvm-symbolizer/coff-exports.test
@@ -2,8 +2,10 @@ RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
 RUN: 	 | llvm-symbolizer --inlining --relative-address -obj="%p/Inputs/coff-exports.exe" \
 RUN:	 | FileCheck %s
 
-This test relies on UnDecorateSymbolName, which is Windows-only.
+This test relies on UnDecorateSymbolName, which is Win32-only.
 REQUIRES: system-windows
+REQUIRES: target-windows
+FIXME: This test depends on host, not target.
 
 ADDR: 0x500A
 ADDR: 0x5038

From 123063377428540752bad91c7fbd536a762e31bd Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 1 Feb 2017 21:35:00 +0000
Subject: [PATCH 2/4] Vendor import of clang release_40 branch r293807:
 https://llvm.org/svn/llvm-project/cfe/branches/release_40@293807

---
 include/clang/AST/Type.h                      |  14 +-
 include/clang/Basic/DiagnosticSemaKinds.td    |   3 +-
 include/clang/Sema/Overload.h                 |  32 +-
 include/clang/Sema/Sema.h                     |  51 ++-
 lib/AST/ExprConstant.cpp                      |   9 +-
 lib/CodeGen/CodeGenTypes.cpp                  |   2 +-
 lib/Sema/SemaChecking.cpp                     |  34 +-
 lib/Sema/SemaExpr.cpp                         |  19 +-
 lib/Sema/SemaExprCXX.cpp                      |   5 +
 lib/Sema/SemaLookup.cpp                       |   3 +-
 lib/Sema/SemaOverload.cpp                     | 289 ++++++----------
 lib/Sema/SemaTemplateInstantiateDecl.cpp      |   8 +-
 lib/Sema/TreeTransform.h                      |   8 +-
 test/CodeGenCXX/microsoft-abi-default-cc.cpp  |   9 +
 test/CodeGenObjC/block-ptr-type-crash.m       |  28 ++
 test/CodeGenObjC/encode-test.m                |  11 +
 .../Inputs/diagnose-if-warn-system-header.h   |  11 +
 test/Sema/diagnose_if.c                       |   9 +-
 test/SemaCXX/destructor.cpp                   |  20 ++
 test/SemaCXX/diagnose_if.cpp                  | 313 +++++++++++++++---
 20 files changed, 530 insertions(+), 348 deletions(-)
 create mode 100644 test/CodeGenObjC/block-ptr-type-crash.m
 create mode 100644 test/Sema/Inputs/diagnose-if-warn-system-header.h

diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h
index 744a408e5796..a50e054f9b28 100644
--- a/include/clang/AST/Type.h
+++ b/include/clang/AST/Type.h
@@ -3827,13 +3827,13 @@ class AttributedType : public Type, public llvm::FoldingSetNode {
 
   friend class ASTContext; // creates these
 
-  AttributedType(QualType canon, Kind attrKind,
-                 QualType modified, QualType equivalent)
-    : Type(Attributed, canon, canon->isDependentType(),
-           canon->isInstantiationDependentType(),
-           canon->isVariablyModifiedType(),
-           canon->containsUnexpandedParameterPack()),
-      ModifiedType(modified), EquivalentType(equivalent) {
+  AttributedType(QualType canon, Kind attrKind, QualType modified,
+                 QualType equivalent)
+      : Type(Attributed, canon, equivalent->isDependentType(),
+             equivalent->isInstantiationDependentType(),
+             equivalent->isVariablyModifiedType(),
+             equivalent->containsUnexpandedParameterPack()),
+        ModifiedType(modified), EquivalentType(equivalent) {
     AttributedTypeBits.AttrKind = attrKind;
   }
 
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 2d74eecc276e..c9343519e38b 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3373,7 +3373,8 @@ def note_ovl_candidate_has_pass_object_size_params: Note<
     "candidate address cannot be taken because parameter %0 has "
     "pass_object_size attribute">;
 def err_diagnose_if_succeeded : Error<"%0">;
-def warn_diagnose_if_succeeded : Warning<"%0">, InGroup<UserDefinedWarnings>;
+def warn_diagnose_if_succeeded : Warning<"%0">, InGroup<UserDefinedWarnings>,
+    ShowInSystemHeader;
 def note_ovl_candidate_disabled_by_function_cond_attr : Note<
     "candidate disabled: %0">;
 def note_ovl_candidate_disabled_by_extension : Note<
diff --git a/include/clang/Sema/Overload.h b/include/clang/Sema/Overload.h
index 88fdc991f394..5220d9873022 100644
--- a/include/clang/Sema/Overload.h
+++ b/include/clang/Sema/Overload.h
@@ -675,26 +675,6 @@ namespace clang {
     /// to be used while performing partial ordering of function templates.
     unsigned ExplicitCallArguments;
 
-    /// The number of diagnose_if attributes that this overload triggered.
-    /// If any of the triggered attributes are errors, this won't count
-    /// diagnose_if warnings.
-    unsigned NumTriggeredDiagnoseIfs = 0;
-
-    /// Basically a TinyPtrVector<DiagnoseIfAttr *> that doesn't own the vector:
-    /// If NumTriggeredDiagnoseIfs is 0 or 1, this is a DiagnoseIfAttr *,
-    /// otherwise it's a pointer to an array of `NumTriggeredDiagnoseIfs`
-    /// DiagnoseIfAttr *s.
-    llvm::PointerUnion<DiagnoseIfAttr *, DiagnoseIfAttr **> DiagnoseIfInfo;
-
-    /// Gets an ArrayRef for the data at DiagnoseIfInfo. Note that this may give
-    /// you a pointer into DiagnoseIfInfo.
-    ArrayRef<DiagnoseIfAttr *> getDiagnoseIfInfo() const {
-      auto *Ptr = NumTriggeredDiagnoseIfs <= 1
-                      ? DiagnoseIfInfo.getAddrOfPtr1()
-                      : DiagnoseIfInfo.get<DiagnoseIfAttr **>();
-      return {Ptr, NumTriggeredDiagnoseIfs};
-    }
-
     union {
       DeductionFailureInfo DeductionFailure;
       
@@ -759,9 +739,8 @@ namespace clang {
     SmallVector<OverloadCandidate, 16> Candidates;
     llvm::SmallPtrSet<Decl *, 16> Functions;
 
-    // Allocator for ConversionSequenceLists and DiagnoseIfAttr* arrays.
-    // We store the first few of each of these inline to avoid allocation for
-    // small sets.
+    // Allocator for ConversionSequenceLists. We store the first few of these
+    // inline to avoid allocation for small sets.
     llvm::BumpPtrAllocator SlabAllocator;
 
     SourceLocation Loc;
@@ -776,6 +755,8 @@ namespace clang {
     /// from the slab allocator.
     /// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
     /// instead.
+    /// FIXME: Now that this only allocates ImplicitConversionSequences, do we
+    /// want to un-generalize this?
     template <typename T>
     T *slabAllocate(unsigned N) {
       // It's simpler if this doesn't need to consider alignment.
@@ -809,11 +790,6 @@ namespace clang {
     SourceLocation getLocation() const { return Loc; }
     CandidateSetKind getKind() const { return Kind; }
 
-    /// Make a DiagnoseIfAttr* array in a block of memory that will live for
-    /// as long as this OverloadCandidateSet. Returns a pointer to the start
-    /// of that array.
-    DiagnoseIfAttr **addDiagnoseIfComplaints(ArrayRef<DiagnoseIfAttr *> CA);
-
     /// \brief Determine when this overload candidate will be new to the
     /// overload set.
     bool isNewCandidate(Decl *F) {
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index c180a8ea3ee1..63d078498fe4 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -2532,14 +2532,14 @@ class Sema {
   void AddMethodCandidate(DeclAccessPair FoundDecl,
                           QualType ObjectType,
                           Expr::Classification ObjectClassification,
-                          Expr *ThisArg, ArrayRef<Expr *> Args,
+                          ArrayRef<Expr *> Args,
                           OverloadCandidateSet& CandidateSet,
                           bool SuppressUserConversion = false);
   void AddMethodCandidate(CXXMethodDecl *Method,
                           DeclAccessPair FoundDecl,
                           CXXRecordDecl *ActingContext, QualType ObjectType,
                           Expr::Classification ObjectClassification,
-                          Expr *ThisArg, ArrayRef<Expr *> Args,
+                          ArrayRef<Expr *> Args,
                           OverloadCandidateSet& CandidateSet,
                           bool SuppressUserConversions = false,
                           bool PartialOverloading = false,
@@ -2550,7 +2550,6 @@ class Sema {
                                  TemplateArgumentListInfo *ExplicitTemplateArgs,
                                   QualType ObjectType,
                                   Expr::Classification ObjectClassification,
-                                  Expr *ThisArg,
                                   ArrayRef<Expr *> Args,
                                   OverloadCandidateSet& CandidateSet,
                                   bool SuppressUserConversions = false,
@@ -2624,37 +2623,27 @@ class Sema {
   EnableIfAttr *CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
                               bool MissingImplicitThis = false);
 
-  /// Check the diagnose_if attributes on the given function. Returns the
-  /// first succesful fatal attribute, or null if calling Function(Args) isn't
-  /// an error.
+  /// Emit diagnostics for the diagnose_if attributes on Function, ignoring any
+  /// non-ArgDependent DiagnoseIfAttrs.
   ///
-  /// This only considers ArgDependent DiagnoseIfAttrs.
+  /// Argument-dependent diagnose_if attributes should be checked each time a
+  /// function is used as a direct callee of a function call.
   ///
-  /// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
-  /// succeed. If this function returns non-null, the contents of Nonfatal are
-  /// unspecified.
-  DiagnoseIfAttr *
-  checkArgDependentDiagnoseIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
-                              SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal,
-                              bool MissingImplicitThis = false,
-                              Expr *ThisArg = nullptr);
+  /// Returns true if any errors were emitted.
+  bool diagnoseArgDependentDiagnoseIfAttrs(const FunctionDecl *Function,
+                                           const Expr *ThisArg,
+                                           ArrayRef<const Expr *> Args,
+                                           SourceLocation Loc);
 
-  /// Check the diagnose_if expressions on the given function. Returns the
-  /// first succesful fatal attribute, or null if using Function isn't
-  /// an error.
+  /// Emit diagnostics for the diagnose_if attributes on Function, ignoring any
+  /// ArgDependent DiagnoseIfAttrs.
   ///
-  /// This ignores all ArgDependent DiagnoseIfAttrs.
+  /// Argument-independent diagnose_if attributes should be checked on every use
+  /// of a function.
   ///
-  /// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
-  /// succeed. If this function returns non-null, the contents of Nonfatal are
-  /// unspecified.
-  DiagnoseIfAttr *
-  checkArgIndependentDiagnoseIf(FunctionDecl *Function,
-                                SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal);
-
-  /// Emits the diagnostic contained in the given DiagnoseIfAttr at Loc. Also
-  /// emits a note about the location of said attribute.
-  void emitDiagnoseIfDiagnostic(SourceLocation Loc, const DiagnoseIfAttr *DIA);
+  /// Returns true if any errors were emitted.
+  bool diagnoseArgIndependentDiagnoseIfAttrs(const FunctionDecl *Function,
+                                             SourceLocation Loc);
 
   /// Returns whether the given function's address can be taken or not,
   /// optionally emitting a diagnostic if the address can't be taken.
@@ -9914,8 +9903,8 @@ class Sema {
                             SourceLocation Loc);
 
   void checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
-                 ArrayRef<const Expr *> Args, bool IsMemberFunction,
-                 SourceLocation Loc, SourceRange Range,
+                 const Expr *ThisArg, ArrayRef<const Expr *> Args,
+                 bool IsMemberFunction, SourceLocation Loc, SourceRange Range,
                  VariadicCallType CallType);
 
   bool CheckObjCString(Expr *Arg);
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index 5fab58a5af95..6a6baf96ad37 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -2362,7 +2362,14 @@ static unsigned getBaseIndex(const CXXRecordDecl *Derived,
 /// Extract the value of a character from a string literal.
 static APSInt extractStringLiteralCharacter(EvalInfo &Info, const Expr *Lit,
                                             uint64_t Index) {
-  // FIXME: Support ObjCEncodeExpr, MakeStringConstant
+  // FIXME: Support MakeStringConstant
+  if (const auto *ObjCEnc = dyn_cast<ObjCEncodeExpr>(Lit)) {
+    std::string Str;
+    Info.Ctx.getObjCEncodingForType(ObjCEnc->getEncodedType(), Str);
+    assert(Index <= Str.size() && "Index too large");
+    return APSInt::getUnsigned(Str.c_str()[Index]);
+  }
+
   if (auto PE = dyn_cast<PredefinedExpr>(Lit))
     Lit = PE->getFunctionName();
   const StringLiteral *S = cast<StringLiteral>(Lit);
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index b95b4fff5734..adb40c8c0d47 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -737,7 +737,7 @@ CodeGenTypes::getCGRecordLayout(const RecordDecl *RD) {
 }
 
 bool CodeGenTypes::isPointerZeroInitializable(QualType T) {
-  assert (T->isAnyPointerType() && "Invalid type");
+  assert((T->isAnyPointerType() || T->isBlockPointerType()) && "Invalid type");
   return isZeroInitializable(T);
 }
 
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 49208e20a49d..3aedb2a8c9bb 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -2426,11 +2426,12 @@ static void CheckNonNullArguments(Sema &S,
 }
 
 /// Handles the checks for format strings, non-POD arguments to vararg
-/// functions, and NULL arguments passed to non-NULL parameters.
+/// functions, NULL arguments passed to non-NULL parameters, and diagnose_if
+/// attributes.
 void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
-                     ArrayRef<const Expr *> Args, bool IsMemberFunction,
-                     SourceLocation Loc, SourceRange Range,
-                     VariadicCallType CallType) {
+                     const Expr *ThisArg, ArrayRef<const Expr *> Args,
+                     bool IsMemberFunction, SourceLocation Loc,
+                     SourceRange Range, VariadicCallType CallType) {
   // FIXME: We should check as much as we can in the template definition.
   if (CurContext->isDependentContext())
     return;
@@ -2477,6 +2478,9 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
         CheckArgumentWithTypeTag(I, Args.data());
     }
   }
+
+  if (FD)
+    diagnoseArgDependentDiagnoseIfAttrs(FD, ThisArg, Args, Loc);
 }
 
 /// CheckConstructorCall - Check a constructor call for correctness and safety
@@ -2487,8 +2491,8 @@ void Sema::CheckConstructorCall(FunctionDecl *FDecl,
                                 SourceLocation Loc) {
   VariadicCallType CallType =
     Proto->isVariadic() ? VariadicConstructor : VariadicDoesNotApply;
-  checkCall(FDecl, Proto, Args, /*IsMemberFunction=*/true, Loc, SourceRange(), 
-            CallType);
+  checkCall(FDecl, Proto, /*ThisArg=*/nullptr, Args, /*IsMemberFunction=*/true,
+            Loc, SourceRange(), CallType);
 }
 
 /// CheckFunctionCall - Check a direct function call for various correctness
@@ -2503,14 +2507,20 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                                                   TheCall->getCallee());
   Expr** Args = TheCall->getArgs();
   unsigned NumArgs = TheCall->getNumArgs();
+
+  Expr *ImplicitThis = nullptr;
   if (IsMemberOperatorCall) {
     // If this is a call to a member operator, hide the first argument
     // from checkCall.
     // FIXME: Our choice of AST representation here is less than ideal.
+    ImplicitThis = Args[0];
     ++Args;
     --NumArgs;
-  }
-  checkCall(FDecl, Proto, llvm::makeArrayRef(Args, NumArgs), 
+  } else if (IsMemberFunction)
+    ImplicitThis =
+        cast<CXXMemberCallExpr>(TheCall)->getImplicitObjectArgument();
+
+  checkCall(FDecl, Proto, ImplicitThis, llvm::makeArrayRef(Args, NumArgs),
             IsMemberFunction, TheCall->getRParenLoc(),
             TheCall->getCallee()->getSourceRange(), CallType);
 
@@ -2546,8 +2556,8 @@ bool Sema::CheckObjCMethodCall(ObjCMethodDecl *Method, SourceLocation lbrac,
   VariadicCallType CallType =
       Method->isVariadic() ? VariadicMethod : VariadicDoesNotApply;
 
-  checkCall(Method, nullptr, Args,
-            /*IsMemberFunction=*/false, lbrac, Method->getSourceRange(), 
+  checkCall(Method, nullptr, /*ThisArg=*/nullptr, Args,
+            /*IsMemberFunction=*/false, lbrac, Method->getSourceRange(),
             CallType);
 
   return false;
@@ -2576,7 +2586,7 @@ bool Sema::CheckPointerCall(NamedDecl *NDecl, CallExpr *TheCall,
     CallType = VariadicFunction;
   }
 
-  checkCall(NDecl, Proto,
+  checkCall(NDecl, Proto, /*ThisArg=*/nullptr,
             llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()),
             /*IsMemberFunction=*/false, TheCall->getRParenLoc(),
             TheCall->getCallee()->getSourceRange(), CallType);
@@ -2589,7 +2599,7 @@ bool Sema::CheckPointerCall(NamedDecl *NDecl, CallExpr *TheCall,
 bool Sema::CheckOtherCall(CallExpr *TheCall, const FunctionProtoType *Proto) {
   VariadicCallType CallType = getVariadicCallType(/*FDecl=*/nullptr, Proto,
                                                   TheCall->getCallee());
-  checkCall(/*FDecl=*/nullptr, Proto,
+  checkCall(/*FDecl=*/nullptr, Proto, /*ThisArg=*/nullptr,
             llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()),
             /*IsMemberFunction=*/false, TheCall->getRParenLoc(),
             TheCall->getCallee()->getSourceRange(), CallType);
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index e1e1f0283629..0077d6c539c3 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -342,7 +342,6 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
   }
 
   // See if this is a deleted function.
-  SmallVector<DiagnoseIfAttr *, 4> DiagnoseIfWarnings;
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->isDeleted()) {
       auto *Ctor = dyn_cast<CXXConstructorDecl>(FD);
@@ -365,11 +364,8 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
     if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD))
       return true;
 
-    if (const DiagnoseIfAttr *A =
-            checkArgIndependentDiagnoseIf(FD, DiagnoseIfWarnings)) {
-      emitDiagnoseIfDiagnostic(Loc, A);
+    if (diagnoseArgIndependentDiagnoseIfAttrs(FD, Loc))
       return true;
-    }
   }
 
   // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions
@@ -385,9 +381,6 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
     return true;
   }
 
-  for (const auto *W : DiagnoseIfWarnings)
-    emitDiagnoseIfDiagnostic(Loc, W);
-
   DiagnoseAvailabilityOfDecl(*this, D, Loc, UnknownObjCClass,
                              ObjCPropertyAccess);
 
@@ -5189,16 +5182,6 @@ static void checkDirectCallValidity(Sema &S, const Expr *Fn,
         << Attr->getCond()->getSourceRange() << Attr->getMessage();
     return;
   }
-
-  SmallVector<DiagnoseIfAttr *, 4> Nonfatal;
-  if (const DiagnoseIfAttr *Attr = S.checkArgDependentDiagnoseIf(
-          Callee, ArgExprs, Nonfatal, /*MissingImplicitThis=*/true)) {
-    S.emitDiagnoseIfDiagnostic(Fn->getLocStart(), Attr);
-    return;
-  }
-
-  for (const auto *W : Nonfatal)
-    S.emitDiagnoseIfDiagnostic(Fn->getLocStart(), W);
 }
 
 /// ActOnCallExpr - Handle a call to Fn with the specified array of arguments.
diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp
index b2fb33f53432..3de677e37ba2 100644
--- a/lib/Sema/SemaExprCXX.cpp
+++ b/lib/Sema/SemaExprCXX.cpp
@@ -6712,6 +6712,11 @@ ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl,
   CXXMemberCallExpr *CE =
     new (Context) CXXMemberCallExpr(Context, ME, None, ResultType, VK,
                                     Exp.get()->getLocEnd());
+
+  if (CheckFunctionCall(Method, CE,
+                        Method->getType()->castAs<FunctionProtoType>()))
+    return ExprError();
+
   return CE;
 }
 
diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp
index 883e2ae264e9..38a7b8c127cc 100644
--- a/lib/Sema/SemaLookup.cpp
+++ b/lib/Sema/SemaLookup.cpp
@@ -2960,7 +2960,6 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD,
     if (CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(Cand->getUnderlyingDecl())) {
       if (SM == CXXCopyAssignment || SM == CXXMoveAssignment)
         AddMethodCandidate(M, Cand, RD, ThisTy, Classification,
-                           /*ThisArg=*/nullptr,
                            llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
       else if (CtorInfo)
         AddOverloadCandidate(CtorInfo.Constructor, CtorInfo.FoundDecl,
@@ -2973,7 +2972,7 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD,
       if (SM == CXXCopyAssignment || SM == CXXMoveAssignment)
         AddMethodTemplateCandidate(
             Tmpl, Cand, RD, nullptr, ThisTy, Classification,
-            /*ThisArg=*/nullptr, llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
+            llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
       else if (CtorInfo)
         AddTemplateOverloadCandidate(
             CtorInfo.ConstructorTmpl, CtorInfo.FoundDecl, nullptr,
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index c66fe7678d8c..f976b76727f5 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -839,20 +839,12 @@ void OverloadCandidateSet::destroyCandidates() {
 
 void OverloadCandidateSet::clear() {
   destroyCandidates();
-  // DiagnoseIfAttrs are just pointers, so we don't need to destroy them.
   SlabAllocator.Reset();
   NumInlineBytesUsed = 0;
   Candidates.clear();
   Functions.clear();
 }
 
-DiagnoseIfAttr **
-OverloadCandidateSet::addDiagnoseIfComplaints(ArrayRef<DiagnoseIfAttr *> CA) {
-  auto *DIA = slabAllocate<DiagnoseIfAttr *>(CA.size());
-  std::uninitialized_copy(CA.begin(), CA.end(), DIA);
-  return DIA;
-}
-
 namespace {
   class UnbridgedCastsSet {
     struct Entry {
@@ -5831,28 +5823,6 @@ static bool IsAcceptableNonMemberOperatorCandidate(ASTContext &Context,
   return false;
 }
 
-static void initDiagnoseIfComplaint(Sema &S, OverloadCandidateSet &CandidateSet,
-                                    OverloadCandidate &Candidate,
-                                    FunctionDecl *Function,
-                                    ArrayRef<Expr *> Args,
-                                    bool MissingImplicitThis = false,
-                                    Expr *ExplicitThis = nullptr) {
-  SmallVector<DiagnoseIfAttr *, 8> Results;
-  if (DiagnoseIfAttr *DIA = S.checkArgDependentDiagnoseIf(
-          Function, Args, Results, MissingImplicitThis, ExplicitThis)) {
-    Results.clear();
-    Results.push_back(DIA);
-  }
-
-  Candidate.NumTriggeredDiagnoseIfs = Results.size();
-  if (Results.empty())
-    Candidate.DiagnoseIfInfo = nullptr;
-  else if (Results.size() == 1)
-    Candidate.DiagnoseIfInfo = Results[0];
-  else
-    Candidate.DiagnoseIfInfo = CandidateSet.addDiagnoseIfComplaints(Results);
-}
-
 /// AddOverloadCandidate - Adds the given function to the set of
 /// candidate functions, using the given function call arguments.  If
 /// @p SuppressUserConversions, then don't allow user-defined
@@ -5886,10 +5856,9 @@ Sema::AddOverloadCandidate(FunctionDecl *Function,
       // object argument (C++ [over.call.func]p3), and the acting context
       // is irrelevant.
       AddMethodCandidate(Method, FoundDecl, Method->getParent(), QualType(),
-                         Expr::Classification::makeSimpleLValue(),
-                         /*ThisArg=*/nullptr, Args, CandidateSet,
-                         SuppressUserConversions, PartialOverloading,
-                         EarlyConversions);
+                         Expr::Classification::makeSimpleLValue(), Args,
+                         CandidateSet, SuppressUserConversions,
+                         PartialOverloading, EarlyConversions);
       return;
     }
     // We treat a constructor like a non-member function, since its object
@@ -6050,8 +6019,6 @@ Sema::AddOverloadCandidate(FunctionDecl *Function,
     Candidate.FailureKind = ovl_fail_ext_disabled;
     return;
   }
-
-  initDiagnoseIfComplaint(*this, CandidateSet, Candidate, Function, Args);
 }
 
 ObjCMethodDecl *
@@ -6260,85 +6227,73 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
   return nullptr;
 }
 
-static bool gatherDiagnoseIfAttrs(FunctionDecl *Function, bool ArgDependent,
-                                  SmallVectorImpl<DiagnoseIfAttr *> &Errors,
-                                  SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal) {
-  for (auto *DIA : Function->specific_attrs<DiagnoseIfAttr>())
-    if (ArgDependent == DIA->getArgDependent()) {
-      if (DIA->isError())
-        Errors.push_back(DIA);
-      else
-        Nonfatal.push_back(DIA);
-    }
-
-  return !Errors.empty() || !Nonfatal.empty();
-}
-
 template <typename CheckFn>
-static DiagnoseIfAttr *
-checkDiagnoseIfAttrsWith(const SmallVectorImpl<DiagnoseIfAttr *> &Errors,
-                         SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal,
-                         CheckFn &&IsSuccessful) {
+static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const FunctionDecl *FD,
+                                        bool ArgDependent, SourceLocation Loc,
+                                        CheckFn &&IsSuccessful) {
+  SmallVector<const DiagnoseIfAttr *, 8> Attrs;
+  for (const auto *DIA : FD->specific_attrs<DiagnoseIfAttr>()) {
+    if (ArgDependent == DIA->getArgDependent())
+      Attrs.push_back(DIA);
+  }
+
+  // Common case: No diagnose_if attributes, so we can quit early.
+  if (Attrs.empty())
+    return false;
+
+  auto WarningBegin = std::stable_partition(
+      Attrs.begin(), Attrs.end(),
+      [](const DiagnoseIfAttr *DIA) { return DIA->isError(); });
+
   // Note that diagnose_if attributes are late-parsed, so they appear in the
   // correct order (unlike enable_if attributes).
-  auto ErrAttr = llvm::find_if(Errors, IsSuccessful);
-  if (ErrAttr != Errors.end())
-    return *ErrAttr;
+  auto ErrAttr = llvm::find_if(llvm::make_range(Attrs.begin(), WarningBegin),
+                               IsSuccessful);
+  if (ErrAttr != WarningBegin) {
+    const DiagnoseIfAttr *DIA = *ErrAttr;
+    S.Diag(Loc, diag::err_diagnose_if_succeeded) << DIA->getMessage();
+    S.Diag(DIA->getLocation(), diag::note_from_diagnose_if)
+        << DIA->getParent() << DIA->getCond()->getSourceRange();
+    return true;
+  }
 
-  llvm::erase_if(Nonfatal, [&](DiagnoseIfAttr *A) { return !IsSuccessful(A); });
-  return nullptr;
+  for (const auto *DIA : llvm::make_range(WarningBegin, Attrs.end()))
+    if (IsSuccessful(DIA)) {
+      S.Diag(Loc, diag::warn_diagnose_if_succeeded) << DIA->getMessage();
+      S.Diag(DIA->getLocation(), diag::note_from_diagnose_if)
+          << DIA->getParent() << DIA->getCond()->getSourceRange();
+    }
+
+  return false;
 }
 
-DiagnoseIfAttr *
-Sema::checkArgDependentDiagnoseIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
-                                  SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal,
-                                  bool MissingImplicitThis,
-                                  Expr *ThisArg) {
-  SmallVector<DiagnoseIfAttr *, 4> Errors;
-  if (!gatherDiagnoseIfAttrs(Function, /*ArgDependent=*/true, Errors, Nonfatal))
-    return nullptr;
-
-  SFINAETrap Trap(*this);
-  SmallVector<Expr *, 16> ConvertedArgs;
-  Expr *ConvertedThis;
-  if (!convertArgsForAvailabilityChecks(*this, Function, ThisArg, Args, Trap,
-                                        MissingImplicitThis, ConvertedThis,
-                                        ConvertedArgs))
-    return nullptr;
-
-  return checkDiagnoseIfAttrsWith(Errors, Nonfatal, [&](DiagnoseIfAttr *DIA) {
-    APValue Result;
-    // It's sane to use the same ConvertedArgs for any redecl of this function,
-    // since EvaluateWithSubstitution only cares about the position of each
-    // argument in the arg list, not the ParmVarDecl* it maps to.
-    if (!DIA->getCond()->EvaluateWithSubstitution(
-            Result, Context, DIA->getParent(), ConvertedArgs, ConvertedThis))
-      return false;
-    return Result.isInt() && Result.getInt().getBoolValue();
-  });
+bool Sema::diagnoseArgDependentDiagnoseIfAttrs(const FunctionDecl *Function,
+                                               const Expr *ThisArg,
+                                               ArrayRef<const Expr *> Args,
+                                               SourceLocation Loc) {
+  return diagnoseDiagnoseIfAttrsWith(
+      *this, Function, /*ArgDependent=*/true, Loc,
+      [&](const DiagnoseIfAttr *DIA) {
+        APValue Result;
+        // It's sane to use the same Args for any redecl of this function, since
+        // EvaluateWithSubstitution only cares about the position of each
+        // argument in the arg list, not the ParmVarDecl* it maps to.
+        if (!DIA->getCond()->EvaluateWithSubstitution(
+                Result, Context, DIA->getParent(), Args, ThisArg))
+          return false;
+        return Result.isInt() && Result.getInt().getBoolValue();
+      });
 }
 
-DiagnoseIfAttr *Sema::checkArgIndependentDiagnoseIf(
-    FunctionDecl *Function, SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal) {
-  SmallVector<DiagnoseIfAttr *, 4> Errors;
-  if (!gatherDiagnoseIfAttrs(Function, /*ArgDependent=*/false, Errors,
-                             Nonfatal))
-    return nullptr;
-
-  return checkDiagnoseIfAttrsWith(Errors, Nonfatal, [&](DiagnoseIfAttr *DIA) {
-    bool Result;
-    return DIA->getCond()->EvaluateAsBooleanCondition(Result, Context) &&
-           Result;
-  });
-}
-
-void Sema::emitDiagnoseIfDiagnostic(SourceLocation Loc,
-                                    const DiagnoseIfAttr *DIA) {
-  auto Code = DIA->isError() ? diag::err_diagnose_if_succeeded
-                             : diag::warn_diagnose_if_succeeded;
-  Diag(Loc, Code) << DIA->getMessage();
-  Diag(DIA->getLocation(), diag::note_from_diagnose_if)
-      << DIA->getParent() << DIA->getCond()->getSourceRange();
+bool Sema::diagnoseArgIndependentDiagnoseIfAttrs(const FunctionDecl *Function,
+                                                 SourceLocation Loc) {
+  return diagnoseDiagnoseIfAttrsWith(
+      *this, Function, /*ArgDependent=*/false, Loc,
+      [&](const DiagnoseIfAttr *DIA) {
+        bool Result;
+        return DIA->getCond()->EvaluateAsBooleanCondition(Result, Context) &&
+               Result;
+      });
 }
 
 /// \brief Add all of the function declarations in the given function set to
@@ -6356,8 +6311,8 @@ void Sema::AddFunctionCandidates(const UnresolvedSetImpl &Fns,
         AddMethodCandidate(cast<CXXMethodDecl>(FD), F.getPair(),
                            cast<CXXMethodDecl>(FD)->getParent(),
                            Args[0]->getType(), Args[0]->Classify(Context),
-                           Args[0], Args.slice(1), CandidateSet,
-                           SuppressUserConversions, PartialOverloading);
+                           Args.slice(1), CandidateSet, SuppressUserConversions,
+                           PartialOverloading);
       else
         AddOverloadCandidate(FD, F.getPair(), Args, CandidateSet,
                              SuppressUserConversions, PartialOverloading);
@@ -6369,7 +6324,7 @@ void Sema::AddFunctionCandidates(const UnresolvedSetImpl &Fns,
             FunTmpl, F.getPair(),
             cast<CXXRecordDecl>(FunTmpl->getDeclContext()),
             ExplicitTemplateArgs, Args[0]->getType(),
-            Args[0]->Classify(Context), Args[0], Args.slice(1), CandidateSet,
+            Args[0]->Classify(Context), Args.slice(1), CandidateSet,
             SuppressUserConversions, PartialOverloading);
       else
         AddTemplateOverloadCandidate(FunTmpl, F.getPair(),
@@ -6385,7 +6340,6 @@ void Sema::AddFunctionCandidates(const UnresolvedSetImpl &Fns,
 void Sema::AddMethodCandidate(DeclAccessPair FoundDecl,
                               QualType ObjectType,
                               Expr::Classification ObjectClassification,
-                              Expr *ThisArg,
                               ArrayRef<Expr *> Args,
                               OverloadCandidateSet& CandidateSet,
                               bool SuppressUserConversions) {
@@ -6399,15 +6353,13 @@ void Sema::AddMethodCandidate(DeclAccessPair FoundDecl,
     assert(isa<CXXMethodDecl>(TD->getTemplatedDecl()) &&
            "Expected a member function template");
     AddMethodTemplateCandidate(TD, FoundDecl, ActingContext,
-                               /*ExplicitArgs*/ nullptr,
-                               ObjectType, ObjectClassification,
-                               ThisArg, Args, CandidateSet,
+                               /*ExplicitArgs*/ nullptr, ObjectType,
+                               ObjectClassification, Args, CandidateSet,
                                SuppressUserConversions);
   } else {
     AddMethodCandidate(cast<CXXMethodDecl>(Decl), FoundDecl, ActingContext,
-                       ObjectType, ObjectClassification,
-                       ThisArg, Args,
-                       CandidateSet, SuppressUserConversions);
+                       ObjectType, ObjectClassification, Args, CandidateSet,
+                       SuppressUserConversions);
   }
 }
 
@@ -6422,7 +6374,7 @@ void
 Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
                          CXXRecordDecl *ActingContext, QualType ObjectType,
                          Expr::Classification ObjectClassification,
-                         Expr *ThisArg, ArrayRef<Expr *> Args,
+                         ArrayRef<Expr *> Args,
                          OverloadCandidateSet &CandidateSet,
                          bool SuppressUserConversions,
                          bool PartialOverloading,
@@ -6544,9 +6496,6 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
     Candidate.DeductionFailure.Data = FailedAttr;
     return;
   }
-
-  initDiagnoseIfComplaint(*this, CandidateSet, Candidate, Method, Args,
-                          /*MissingImplicitThis=*/!ThisArg, ThisArg);
 }
 
 /// \brief Add a C++ member function template as a candidate to the candidate
@@ -6559,7 +6508,6 @@ Sema::AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl,
                                  TemplateArgumentListInfo *ExplicitTemplateArgs,
                                  QualType ObjectType,
                                  Expr::Classification ObjectClassification,
-                                 Expr *ThisArg,
                                  ArrayRef<Expr *> Args,
                                  OverloadCandidateSet& CandidateSet,
                                  bool SuppressUserConversions,
@@ -6613,9 +6561,9 @@ Sema::AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl,
   assert(isa<CXXMethodDecl>(Specialization) &&
          "Specialization is not a member function?");
   AddMethodCandidate(cast<CXXMethodDecl>(Specialization), FoundDecl,
-                     ActingContext, ObjectType, ObjectClassification,
-                     /*ThisArg=*/ThisArg, Args, CandidateSet,
-                     SuppressUserConversions, PartialOverloading, Conversions);
+                     ActingContext, ObjectType, ObjectClassification, Args,
+                     CandidateSet, SuppressUserConversions, PartialOverloading,
+                     Conversions);
 }
 
 /// \brief Add a C++ function template specialization as a candidate
@@ -6942,8 +6890,6 @@ Sema::AddConversionCandidate(CXXConversionDecl *Conversion,
     Candidate.DeductionFailure.Data = FailedAttr;
     return;
   }
-
-  initDiagnoseIfComplaint(*this, CandidateSet, Candidate, Conversion, None, false, From);
 }
 
 /// \brief Adds a conversion function template specialization
@@ -7096,8 +7042,6 @@ void Sema::AddSurrogateCandidate(CXXConversionDecl *Conversion,
     Candidate.DeductionFailure.Data = FailedAttr;
     return;
   }
-
-  initDiagnoseIfComplaint(*this, CandidateSet, Candidate, Conversion, None);
 }
 
 /// \brief Add overload candidates for overloaded operators that are
@@ -7146,7 +7090,7 @@ void Sema::AddMemberOperatorCandidates(OverloadedOperatorKind Op,
          Oper != OperEnd;
          ++Oper)
       AddMethodCandidate(Oper.getPair(), Args[0]->getType(),
-                         Args[0]->Classify(Context), Args[0], Args.slice(1),
+                         Args[0]->Classify(Context), Args.slice(1),
                          CandidateSet, /*SuppressUserConversions=*/false);
   }
 }
@@ -9178,17 +9122,6 @@ void Sema::diagnoseEquivalentInternalLinkageDeclarations(
   }
 }
 
-static bool isCandidateUnavailableDueToDiagnoseIf(const OverloadCandidate &OC) {
-  ArrayRef<DiagnoseIfAttr *> Info = OC.getDiagnoseIfInfo();
-  if (!Info.empty() && Info[0]->isError())
-    return true;
-
-  assert(llvm::all_of(Info,
-                      [](const DiagnoseIfAttr *A) { return !A->isError(); }) &&
-         "DiagnoseIf info shouldn't have mixed warnings and errors.");
-  return false;
-}
-
 /// \brief Computes the best viable function (C++ 13.3.3)
 /// within an overload candidate set.
 ///
@@ -9267,19 +9200,13 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
   // Best is the best viable function.
   if (Best->Function &&
       (Best->Function->isDeleted() ||
-       S.isFunctionConsideredUnavailable(Best->Function) ||
-       isCandidateUnavailableDueToDiagnoseIf(*Best)))
+       S.isFunctionConsideredUnavailable(Best->Function)))
     return OR_Deleted;
 
   if (!EquivalentCands.empty())
     S.diagnoseEquivalentInternalLinkageDeclarations(Loc, Best->Function,
                                                     EquivalentCands);
 
-  for (const auto *W : Best->getDiagnoseIfInfo()) {
-    assert(W->isWarning() && "Errors should've been caught earlier!");
-    S.emitDiagnoseIfDiagnostic(Loc, W);
-  }
-
   return OR_Success;
 }
 
@@ -10162,14 +10089,6 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
       MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
       return;
     }
-    if (isCandidateUnavailableDueToDiagnoseIf(*Cand)) {
-      auto *A = Cand->DiagnoseIfInfo.get<DiagnoseIfAttr *>();
-      assert(A->isError() && "Non-error diagnose_if disables a candidate?");
-      S.Diag(Cand->Function->getLocation(),
-             diag::note_ovl_candidate_disabled_by_function_cond_attr)
-          << A->getCond()->getSourceRange() << A->getMessage();
-      return;
-    }
 
     // We don't really have anything else to say about viable candidates.
     S.NoteOverloadCandidate(Cand->FoundDecl, Fn);
@@ -12113,6 +12032,10 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
       if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall, FnDecl))
         return ExprError();
 
+      if (CheckFunctionCall(FnDecl, TheCall,
+                            FnDecl->getType()->castAs<FunctionProtoType>()))
+        return ExprError();
+
       return MaybeBindToTemporary(TheCall);
     } else {
       // We matched a built-in operator. Convert the arguments, then
@@ -12343,16 +12266,20 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
           return ExprError();
 
         ArrayRef<const Expr *> ArgsArray(Args, 2);
+        const Expr *ImplicitThis = nullptr;
         // Cut off the implicit 'this'.
-        if (isa<CXXMethodDecl>(FnDecl))
+        if (isa<CXXMethodDecl>(FnDecl)) {
+          ImplicitThis = ArgsArray[0];
           ArgsArray = ArgsArray.slice(1);
+        }
 
         // Check for a self move.
         if (Op == OO_Equal)
           DiagnoseSelfMove(Args[0], Args[1], OpLoc);
 
-        checkCall(FnDecl, nullptr, ArgsArray, isa<CXXMethodDecl>(FnDecl), OpLoc, 
-                  TheCall->getSourceRange(), VariadicDoesNotApply);
+        checkCall(FnDecl, nullptr, ImplicitThis, ArgsArray,
+                  isa<CXXMethodDecl>(FnDecl), OpLoc, TheCall->getSourceRange(),
+                  VariadicDoesNotApply);
 
         return MaybeBindToTemporary(TheCall);
       } else {
@@ -12561,6 +12488,10 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
         if (CheckCallReturnType(FnDecl->getReturnType(), LLoc, TheCall, FnDecl))
           return ExprError();
 
+        if (CheckFunctionCall(Method, TheCall,
+                              Method->getType()->castAs<FunctionProtoType>()))
+          return ExprError();
+
         return MaybeBindToTemporary(TheCall);
       } else {
         // We matched a built-in operator. Convert the arguments, then
@@ -12727,16 +12658,6 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
       TemplateArgs = &TemplateArgsBuffer;
     }
 
-    // Poor-programmer's Lazy<Expr *>; isImplicitAccess requires stripping
-    // parens/casts, which would be nice to avoid potentially doing multiple
-    // times.
-    llvm::Optional<Expr *> UnresolvedBase;
-    auto GetUnresolvedBase = [&] {
-      if (!UnresolvedBase.hasValue())
-        UnresolvedBase =
-          UnresExpr->isImplicitAccess() ? nullptr : UnresExpr->getBase();
-      return *UnresolvedBase;
-    };
     for (UnresolvedMemberExpr::decls_iterator I = UnresExpr->decls_begin(),
            E = UnresExpr->decls_end(); I != E; ++I) {
 
@@ -12757,14 +12678,12 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
           continue;
 
         AddMethodCandidate(Method, I.getPair(), ActingDC, ObjectType,
-                           ObjectClassification,
-                           /*ThisArg=*/GetUnresolvedBase(), Args, CandidateSet,
+                           ObjectClassification, Args, CandidateSet,
                            /*SuppressUserConversions=*/false);
       } else {
         AddMethodTemplateCandidate(
             cast<FunctionTemplateDecl>(Func), I.getPair(), ActingDC,
-            TemplateArgs, ObjectType, ObjectClassification,
-            /*ThisArg=*/GetUnresolvedBase(), Args, CandidateSet,
+            TemplateArgs, ObjectType, ObjectClassification, Args, CandidateSet,
             /*SuppressUsedConversions=*/false);
       }
     }
@@ -12882,16 +12801,6 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
           << Attr->getCond()->getSourceRange() << Attr->getMessage();
       return ExprError();
     }
-
-    SmallVector<DiagnoseIfAttr *, 4> Nonfatal;
-    if (const DiagnoseIfAttr *Attr = checkArgDependentDiagnoseIf(
-            Method, Args, Nonfatal, false, MemE->getBase())) {
-      emitDiagnoseIfDiagnostic(MemE->getMemberLoc(), Attr);
-      return ExprError();
-    }
-
-    for (const auto *Attr : Nonfatal)
-      emitDiagnoseIfDiagnostic(MemE->getMemberLoc(), Attr);
   }
 
   if ((isa<CXXConstructorDecl>(CurContext) || 
@@ -12970,9 +12879,8 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
   for (LookupResult::iterator Oper = R.begin(), OperEnd = R.end();
        Oper != OperEnd; ++Oper) {
     AddMethodCandidate(Oper.getPair(), Object.get()->getType(),
-                       Object.get()->Classify(Context),
-                       Object.get(), Args, CandidateSet,
-                       /*SuppressUserConversions=*/ false);
+                       Object.get()->Classify(Context), Args, CandidateSet,
+                       /*SuppressUserConversions=*/false);
   }
 
   // C++ [over.call.object]p2:
@@ -13247,8 +13155,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc,
   for (LookupResult::iterator Oper = R.begin(), OperEnd = R.end();
        Oper != OperEnd; ++Oper) {
     AddMethodCandidate(Oper.getPair(), Base->getType(), Base->Classify(Context),
-                       Base, None, CandidateSet,
-                       /*SuppressUserConversions=*/false);
+                       None, CandidateSet, /*SuppressUserConversions=*/false);
   }
 
   bool HadMultipleCandidates = (CandidateSet.size() > 1);
@@ -13322,7 +13229,11 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc,
                                       Base, ResultTy, VK, OpLoc, false);
 
   if (CheckCallReturnType(Method->getReturnType(), OpLoc, TheCall, Method))
-          return ExprError();
+    return ExprError();
+
+  if (CheckFunctionCall(Method, TheCall,
+                        Method->getType()->castAs<FunctionProtoType>()))
+    return ExprError();
 
   return MaybeBindToTemporary(TheCall);
 }
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 48d8b94af153..200775756d3a 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -4996,8 +4996,12 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
     NamedDecl *Result = nullptr;
     // FIXME: If the name is a dependent name, this lookup won't necessarily
     // find it. Does that ever matter?
-    if (D->getDeclName()) {
-      DeclContext::lookup_result Found = ParentDC->lookup(D->getDeclName());
+    if (auto Name = D->getDeclName()) {
+      DeclarationNameInfo NameInfo(Name, D->getLocation());
+      Name = SubstDeclarationNameInfo(NameInfo, TemplateArgs).getName();
+      if (!Name)
+        return nullptr;
+      DeclContext::lookup_result Found = ParentDC->lookup(Name);
       Result = findInstantiationOf(Context, D, Found.begin(), Found.end());
     } else {
       // Since we don't have a name for the entity we're looking for,
diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h
index c2aa3fef67c8..4388ad34e21b 100644
--- a/lib/Sema/TreeTransform.h
+++ b/lib/Sema/TreeTransform.h
@@ -8818,12 +8818,18 @@ TreeTransform<Derived>::TransformMemberExpr(MemberExpr *E) {
   // base (and therefore couldn't do the check) and a
   // nested-name-qualifier (and therefore could do the lookup).
   NamedDecl *FirstQualifierInScope = nullptr;
+  DeclarationNameInfo MemberNameInfo = E->getMemberNameInfo();
+  if (MemberNameInfo.getName()) {
+    MemberNameInfo = getDerived().TransformDeclarationNameInfo(MemberNameInfo);
+    if (!MemberNameInfo.getName())
+      return ExprError();
+  }
 
   return getDerived().RebuildMemberExpr(Base.get(), FakeOperatorLoc,
                                         E->isArrow(),
                                         QualifierLoc,
                                         TemplateKWLoc,
-                                        E->getMemberNameInfo(),
+                                        MemberNameInfo,
                                         Member,
                                         FoundDecl,
                                         (E->hasExplicitTemplateArgs()
diff --git a/test/CodeGenCXX/microsoft-abi-default-cc.cpp b/test/CodeGenCXX/microsoft-abi-default-cc.cpp
index e3ca39221e88..6259a53dbf39 100644
--- a/test/CodeGenCXX/microsoft-abi-default-cc.cpp
+++ b/test/CodeGenCXX/microsoft-abi-default-cc.cpp
@@ -45,3 +45,12 @@ void __cdecl static_baz() {}
 void static_qux() {}
 // GCABI-LABEL: define void @_Z10static_quxv
 // MSABI: define void @"\01?static_qux@@YAXXZ"
+
+namespace PR31656 {
+template <int I>
+void __cdecl callee(int args[I]);
+// GCABI-LABEL: declare void @_ZN7PR316566calleeILi1EEEvPi(
+// MSABI: declare void @"\01??$callee@$00@PR31656@@YAXQAH@Z"(
+
+void caller() { callee<1>(0); }
+}
diff --git a/test/CodeGenObjC/block-ptr-type-crash.m b/test/CodeGenObjC/block-ptr-type-crash.m
new file mode 100644
index 000000000000..385d64585897
--- /dev/null
+++ b/test/CodeGenObjC/block-ptr-type-crash.m
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -Wno-objc-root-class -fblocks -o /dev/null -triple x86_64-- -emit-llvm %s
+// REQUIRES: asserts
+// Verify there is no assertion.
+
+// rdar://30111891
+
+typedef unsigned long long uint64_t;
+typedef enum AnEnum : uint64_t AnEnum;
+enum AnEnum: uint64_t {
+    AnEnumA
+};
+
+typedef void (^BlockType)();
+@interface MyClass
+@end
+@implementation MyClass
+- (void)_doStuff {
+  struct {
+    int identifier;
+    AnEnum type;
+    BlockType handler;
+  } var = {
+    "hello",
+    AnEnumA,
+    ((void *)0)
+  };
+}
+@end
diff --git a/test/CodeGenObjC/encode-test.m b/test/CodeGenObjC/encode-test.m
index 7f0e76fc3ffd..a1f88e05250a 100644
--- a/test/CodeGenObjC/encode-test.m
+++ b/test/CodeGenObjC/encode-test.m
@@ -180,3 +180,14 @@ const char g14[] = @encode(__typeof__(*test_id));
 
 // CHECK: @g15 = constant [2 x i8] c":\00"
 const char g15[] = @encode(SEL);
+
+typedef typeof(sizeof(int)) size_t;
+size_t strlen(const char *s);
+
+// CHECK-LABEL: @test_strlen(
+// CHECK: %[[i:.*]] = alloca i32
+// CHECK: store i32 1, i32* %[[i]]
+void test_strlen() {
+  const char array[] = @encode(int);
+  int i = strlen(array);
+}
diff --git a/test/Sema/Inputs/diagnose-if-warn-system-header.h b/test/Sema/Inputs/diagnose-if-warn-system-header.h
new file mode 100644
index 000000000000..753c69d4b532
--- /dev/null
+++ b/test/Sema/Inputs/diagnose-if-warn-system-header.h
@@ -0,0 +1,11 @@
+#pragma GCC system_header
+
+inline int system_header_func(int x)
+  __attribute__((diagnose_if(x == x, "system header warning", "warning"))) // expected-note {{from 'diagnose_if' attribute}}
+{
+  return 0;
+}
+
+void test_system_header() {
+  system_header_func(0); // expected-warning {{system header warning}}
+}
diff --git a/test/Sema/diagnose_if.c b/test/Sema/diagnose_if.c
index 219e393bc0cc..27689f49b431 100644
--- a/test/Sema/diagnose_if.c
+++ b/test/Sema/diagnose_if.c
@@ -70,14 +70,14 @@ void runVariable() {
 
 #define _overloadable __attribute__((overloadable))
 
-int ovl1(const char *n) _overloadable _diagnose_if(n, "oh no", "error"); // expected-note{{oh no}}
-int ovl1(void *m) _overloadable; // expected-note{{candidate function}}
+int ovl1(const char *n) _overloadable _diagnose_if(n, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+int ovl1(void *m) _overloadable;
 
 int ovl2(const char *n) _overloadable _diagnose_if(n, "oh no", "error"); // expected-note{{candidate function}}
 int ovl2(char *m) _overloadable; // expected-note{{candidate function}}
 void overloadsYay() {
   ovl1((void *)0);
-  ovl1(""); // expected-error{{call to unavailable function}}
+  ovl1(""); // expected-error{{oh no}}
 
   ovl2((void *)0); // expected-error{{ambiguous}}
 }
@@ -150,3 +150,6 @@ void alwaysWarnWithArg(int a) _diagnose_if(1 || a, "alwaysWarn", "warning"); //
 void runAlwaysWarnWithArg(int a) {
   alwaysWarnWithArg(a); // expected-warning{{alwaysWarn}}
 }
+
+// Test that diagnose_if warnings generated in system headers are not ignored.
+#include "Inputs/diagnose-if-warn-system-header.h"
diff --git a/test/SemaCXX/destructor.cpp b/test/SemaCXX/destructor.cpp
index fe1dde5771ac..49cdc50f3c13 100644
--- a/test/SemaCXX/destructor.cpp
+++ b/test/SemaCXX/destructor.cpp
@@ -431,3 +431,23 @@ class Invalid {
 
 // The constructor definition should not have errors
 Invalid::~Invalid() {}
+
+namespace PR30361 {
+template <typename T>
+struct C1 {
+  ~C1() {}
+  operator C1<T>* () { return nullptr; }
+  void foo1();
+};
+
+template<typename T>
+void C1<T>::foo1() {
+  C1::operator C1<T>*();
+  C1::~C1();
+}
+
+void foo1() {
+  C1<int> x;
+  x.foo1();
+}
+}
diff --git a/test/SemaCXX/diagnose_if.cpp b/test/SemaCXX/diagnose_if.cpp
index f97b79d03529..02b3620e7692 100644
--- a/test/SemaCXX/diagnose_if.cpp
+++ b/test/SemaCXX/diagnose_if.cpp
@@ -2,6 +2,8 @@
 
 #define _diagnose_if(...) __attribute__((diagnose_if(__VA_ARGS__)))
 
+using size_t = decltype(sizeof(int));
+
 namespace type_dependent {
 template <typename T>
 void neverok() _diagnose_if(!T(), "oh no", "error") {} // expected-note 4{{from 'diagnose_if'}}
@@ -51,14 +53,14 @@ void runAll() {
 }
 
 template <typename T>
-void errorIf(T a) _diagnose_if(T() != a, "oh no", "error") {} // expected-note {{candidate disabled: oh no}}
+void errorIf(T a) _diagnose_if(T() != a, "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
 
 template <typename T>
-void warnIf(T a) _diagnose_if(T() != a, "oh no", "warning") {} // expected-note {{from 'diagnose_if'}}
+void warnIf(T a) _diagnose_if(T() != a, "oh no", "warning") {} // expected-note{{from 'diagnose_if'}}
 
 void runIf() {
   errorIf(0);
-  errorIf(1); // expected-error{{call to unavailable function}}
+  errorIf(1); // expected-error{{oh no}}
 
   warnIf(0);
   warnIf(1); // expected-warning{{oh no}}
@@ -114,14 +116,14 @@ void runAll() {
 }
 
 template <int N>
-void errorIf(int a) _diagnose_if(N != a, "oh no", "error") {} // expected-note {{candidate disabled: oh no}}
+void errorIf(int a) _diagnose_if(N != a, "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
 
 template <int N>
-void warnIf(int a) _diagnose_if(N != a, "oh no", "warning") {} // expected-note {{from 'diagnose_if'}}
+void warnIf(int a) _diagnose_if(N != a, "oh no", "warning") {} // expected-note{{from 'diagnose_if'}}
 
 void runIf() {
   errorIf<0>(0);
-  errorIf<0>(1); // expected-error{{call to unavailable function}}
+  errorIf<0>(1); // expected-error{{oh no}}
 
   warnIf<0>(0);
   warnIf<0>(1); // expected-warning{{oh no}}
@@ -135,8 +137,8 @@ void foo(short);
 void bar(int);
 void bar(short) _diagnose_if(1, "oh no", "error");
 
-void fooArg(int a) _diagnose_if(a, "oh no", "error"); // expected-note{{candidate disabled: oh no}}
-void fooArg(short); // expected-note{{candidate function}}
+void fooArg(int a) _diagnose_if(a, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+void fooArg(short);
 
 void barArg(int);
 void barArg(short a) _diagnose_if(a, "oh no", "error");
@@ -145,7 +147,7 @@ void runAll() {
   foo(1); // expected-error{{oh no}}
   bar(1);
 
-  fooArg(1); // expected-error{{call to unavailable function}}
+  fooArg(1); // expected-error{{oh no}}
   barArg(1);
 
   auto p = foo; // expected-error{{incompatible initializer of type '<overloaded function type>'}}
@@ -188,11 +190,11 @@ struct Errors {
   void foo(int i) _diagnose_if(i, "bad i", "error"); // expected-note{{from 'diagnose_if'}}
   void bar(int i) _diagnose_if(i != T(), "bad i", "error"); // expected-note{{from 'diagnose_if'}}
 
-  void fooOvl(int i) _diagnose_if(i, "int bad i", "error"); // expected-note 2{{int bad i}}
-  void fooOvl(short i) _diagnose_if(i, "short bad i", "error"); // expected-note 2{{short bad i}}
+  void fooOvl(int i) _diagnose_if(i, "int bad i", "error"); // expected-note{{from 'diagnose_if'}}
+  void fooOvl(short i) _diagnose_if(i, "short bad i", "error"); // expected-note{{from 'diagnose_if'}}
 
-  void barOvl(int i) _diagnose_if(i != T(), "int bad i", "error"); // expected-note 2{{int bad i}}
-  void barOvl(short i) _diagnose_if(i != T(), "short bad i", "error"); // expected-note 2{{short bad i}}
+  void barOvl(int i) _diagnose_if(i != T(), "int bad i", "error"); // expected-note{{from 'diagnose_if'}}
+  void barOvl(short i) _diagnose_if(i != T(), "short bad i", "error"); // expected-note{{from 'diagnose_if'}}
 };
 
 void runErrors() {
@@ -203,14 +205,14 @@ void runErrors() {
   Errors<int>().bar(1); // expected-error{{bad i}}
 
   Errors<int>().fooOvl(0);
-  Errors<int>().fooOvl(1); // expected-error{{call to unavailable}}
+  Errors<int>().fooOvl(1); // expected-error{{int bad i}}
   Errors<int>().fooOvl(short(0));
-  Errors<int>().fooOvl(short(1)); // expected-error{{call to unavailable}}
+  Errors<int>().fooOvl(short(1)); // expected-error{{short bad i}}
 
   Errors<int>().barOvl(0);
-  Errors<int>().barOvl(1); // expected-error{{call to unavailable}}
+  Errors<int>().barOvl(1); // expected-error{{int bad i}}
   Errors<int>().barOvl(short(0));
-  Errors<int>().barOvl(short(1)); // expected-error{{call to unavailable}}
+  Errors<int>().barOvl(short(1)); // expected-error{{short bad i}}
 }
 
 template <typename T>
@@ -275,8 +277,8 @@ namespace late_constexpr {
 constexpr int foo();
 constexpr int foo(int a);
 
-void bar() _diagnose_if(foo(), "bad foo", "error"); // expected-note{{from 'diagnose_if'}} expected-note{{not viable: requires 0 arguments}}
-void bar(int a) _diagnose_if(foo(a), "bad foo", "error"); // expected-note{{bad foo}}
+void bar() _diagnose_if(foo(), "bad foo", "error"); // expected-note{{from 'diagnose_if'}}
+void bar(int a) _diagnose_if(foo(a), "bad foo", "error"); // expected-note{{from 'diagnose_if'}}
 
 void early() {
   bar();
@@ -290,7 +292,7 @@ constexpr int foo(int a) { return a; }
 void late() {
   bar(); // expected-error{{bad foo}}
   bar(0);
-  bar(1); // expected-error{{call to unavailable function}}
+  bar(1); // expected-error{{bad foo}}
 }
 }
 
@@ -301,11 +303,11 @@ struct Foo {
   constexpr bool isFooable() const { return i; }
 
   void go() const _diagnose_if(isFooable(), "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
-  operator int() const _diagnose_if(isFooable(), "oh no", "error") { return 1; } // expected-note{{oh no}}
+  operator int() const _diagnose_if(isFooable(), "oh no", "error") { return 1; } // expected-note{{from 'diagnose_if'}}
 
-  void go2() const _diagnose_if(isFooable(), "oh no", "error") // expected-note{{oh no}}
+  void go2() const _diagnose_if(isFooable(), "oh no", "error") // expected-note{{from 'diagnose_if'}}
       __attribute__((enable_if(true, ""))) {}
-  void go2() const _diagnose_if(isFooable(), "oh no", "error") {} // expected-note{{oh no}}
+  void go2() const _diagnose_if(isFooable(), "oh no", "error") {}
 
   constexpr int go3() const _diagnose_if(isFooable(), "oh no", "error")
       __attribute__((enable_if(true, ""))) {
@@ -326,20 +328,20 @@ struct Foo {
   }
 };
 
-void go(const Foo &f) _diagnose_if(f.isFooable(), "oh no", "error") {} // expected-note{{oh no}}
+void go(const Foo &f) _diagnose_if(f.isFooable(), "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
 
 void run() {
   Foo(0).go();
   Foo(1).go(); // expected-error{{oh no}}
 
   (void)int(Foo(0));
-  (void)int(Foo(1)); // expected-error{{uses deleted function}}
+  (void)int(Foo(1)); // expected-error{{oh no}}
 
   Foo(0).go2();
-  Foo(1).go2(); // expected-error{{call to unavailable member function}}
+  Foo(1).go2(); // expected-error{{oh no}}
 
   go(Foo(0));
-  go(Foo(1)); // expected-error{{call to unavailable function}}
+  go(Foo(1)); // expected-error{{oh no}}
 }
 }
 
@@ -349,17 +351,17 @@ struct Foo {
   constexpr Foo(int i): i(i) {}
   constexpr bool bad() const { return i; }
 
-  template <typename T> T getVal() _diagnose_if(bad(), "oh no", "error") { // expected-note{{oh no}}
+  template <typename T> T getVal() _diagnose_if(bad(), "oh no", "error") { // expected-note{{from 'diagnose_if'}}
     return T();
   }
 
   template <typename T>
-  constexpr T getVal2() const _diagnose_if(bad(), "oh no", "error") { // expected-note{{oh no}}
+  constexpr T getVal2() const _diagnose_if(bad(), "oh no", "error") { // expected-note{{from 'diagnose_if'}}
     return T();
   }
 
   template <typename T>
-  constexpr operator T() const _diagnose_if(bad(), "oh no", "error") { // expected-note{{oh no}}
+  constexpr operator T() const _diagnose_if(bad(), "oh no", "error") { // expected-note{{from 'diagnose_if'}}
     return T();
   }
 
@@ -369,13 +371,13 @@ struct Foo {
 
 void run() {
   Foo(0).getVal<int>();
-  Foo(1).getVal<int>(); // expected-error{{call to unavailable member function}}
+  Foo(1).getVal<int>(); // expected-error{{oh no}}
 
   Foo(0).getVal2<int>();
-  Foo(1).getVal2<int>(); // expected-error{{call to unavailable member function}}
+  Foo(1).getVal2<int>(); // expected-error{{oh no}}
 
   (void)int(Foo(0));
-  (void)int(Foo(1)); // expected-error{{uses deleted function}}
+  (void)int(Foo(1)); // expected-error{{oh no}}
 }
 }
 
@@ -385,18 +387,18 @@ struct Foo {
   int i;
   constexpr Foo(int i): i(i) {}
   constexpr bool bad() const { return i; }
-  const Bar *operator->() const _diagnose_if(bad(), "oh no", "error") { // expected-note{{oh no}}
+  const Bar *operator->() const _diagnose_if(bad(), "oh no", "error") { // expected-note{{from 'diagnose_if'}}
     return nullptr;
   }
-  void operator()() const _diagnose_if(bad(), "oh no", "error") {} // expected-note{{oh no}}
+  void operator()() const _diagnose_if(bad(), "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
 };
 
 struct ParenOverload {
   int i;
   constexpr ParenOverload(int i): i(i) {}
   constexpr bool bad() const { return i; }
-  void operator()(double) const _diagnose_if(bad(), "oh no", "error") {} // expected-note 2{{oh no}}
-  void operator()(int) const _diagnose_if(bad(), "oh no", "error") {} // expected-note 2{{oh no}}
+  void operator()(double) const _diagnose_if(bad(), "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
+  void operator()(int) const _diagnose_if(bad(), "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
 };
 
 struct ParenTemplate {
@@ -404,33 +406,70 @@ struct ParenTemplate {
   constexpr ParenTemplate(int i): i(i) {}
   constexpr bool bad() const { return i; }
   template <typename T>
-  void operator()(T) const _diagnose_if(bad(), "oh no", "error") {} // expected-note 2{{oh no}}
+  void operator()(T) const _diagnose_if(bad(), "oh no", "error") {} // expected-note 2{{from 'diagnose_if'}}
 };
 
 void run() {
   (void)Foo(0)->j;
-  (void)Foo(1)->j; // expected-error{{selected unavailable operator '->'}}
+  (void)Foo(1)->j; // expected-error{{oh no}}
 
   Foo(0)();
-  Foo(1)(); // expected-error{{unavailable function call operator}}
+  Foo(1)(); // expected-error{{oh no}}
 
   ParenOverload(0)(1);
   ParenOverload(0)(1.);
 
-  ParenOverload(1)(1); // expected-error{{unavailable function call operator}}
-  ParenOverload(1)(1.); // expected-error{{unavailable function call operator}}
+  ParenOverload(1)(1); // expected-error{{oh no}}
+  ParenOverload(1)(1.); // expected-error{{oh no}}
 
   ParenTemplate(0)(1);
   ParenTemplate(0)(1.);
 
-  ParenTemplate(1)(1); // expected-error{{unavailable function call operator}}
-  ParenTemplate(1)(1.); // expected-error{{unavailable function call operator}}
+  ParenTemplate(1)(1); // expected-error{{oh no}}
+  ParenTemplate(1)(1.); // expected-error{{oh no}}
 }
 
 void runLambda() {
-  auto L1 = [](int i) _diagnose_if(i, "oh no", "error") {}; // expected-note{{oh no}} expected-note{{conversion candidate}}
+  auto L1 = [](int i) _diagnose_if(i, "oh no", "error") {}; // expected-note{{from 'diagnose_if'}}
   L1(0);
-  L1(1); // expected-error{{call to unavailable function call}}
+  L1(1); // expected-error{{oh no}}
+}
+
+struct Brackets {
+  int i;
+  constexpr Brackets(int i): i(i) {}
+  void operator[](int) _diagnose_if(i == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+    _diagnose_if(i == 2, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+};
+
+void runBrackets(int i) {
+  Brackets{0}[i];
+  Brackets{1}[i]; // expected-warning{{oh no}}
+  Brackets{2}[i]; // expected-error{{oh no}}
+}
+
+struct Unary {
+  int i;
+  constexpr Unary(int i): i(i) {}
+  void operator+() _diagnose_if(i == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+    _diagnose_if(i == 2, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+};
+
+void runUnary() {
+  +Unary{0};
+  +Unary{1}; // expected-warning{{oh no}}
+  +Unary{2}; // expected-error{{oh no}}
+}
+
+struct PostInc {
+  void operator++(int i) _diagnose_if(i == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+    _diagnose_if(i == 2, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+};
+
+void runPostInc() {
+  PostInc{}++;
+  PostInc{}.operator++(1); // expected-warning{{oh no}}
+  PostInc{}.operator++(2); // expected-error{{oh no}}
 }
 }
 
@@ -439,22 +478,192 @@ struct Foo {
   int I;
   constexpr Foo(int I): I(I) {}
 
-  constexpr const Foo &operator=(const Foo &) const // expected-note 2{{disabled: oh no}}
-      _diagnose_if(I, "oh no", "error") {
+  constexpr const Foo &operator=(const Foo &) const
+      _diagnose_if(I, "oh no", "error") {  // expected-note{{from 'diagnose_if'}}
     return *this;
   }
 
-  constexpr const Foo &operator=(const Foo &&) const // expected-note{{disabled: oh no}} expected-note{{no known conversion}}
-      _diagnose_if(I, "oh no", "error") {
+  constexpr const Foo &operator=(const Foo &&) const
+      _diagnose_if(I, "oh no", "error") { // expected-note{{from 'diagnose_if'}}
     return *this;
   }
 };
 
+struct Bar {
+  int I;
+  constexpr Bar(int I) _diagnose_if(I == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+    _diagnose_if(I == 2, "oh no", "error"): I(I) {} // expected-note{{from 'diagnose_if'}}
+};
+
 void run() {
   constexpr Foo F{0};
   constexpr Foo F2{1};
 
-  F2 = F; // expected-error{{selected unavailable operator}}
-  F2 = Foo{2}; // expected-error{{selected unavailable operator}}
+  F2 = F; // expected-error{{oh no}}
+  F2 = Foo{2}; // expected-error{{oh no}}
+
+  Bar{0};
+  Bar{1}; // expected-warning{{oh no}}
+  Bar{2}; // expected-error{{oh no}}
+}
+}
+
+namespace ref_init {
+struct Bar {};
+struct Baz {};
+struct Foo {
+  int i;
+  constexpr Foo(int i): i(i) {}
+  operator const Bar &() const _diagnose_if(i, "oh no", "warning"); // expected-note{{from 'diagnose_if'}}
+  operator const Baz &() const _diagnose_if(i, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+};
+void fooBar(const Bar &b);
+void fooBaz(const Baz &b);
+
+void run() {
+  fooBar(Foo{0});
+  fooBar(Foo{1}); // expected-warning{{oh no}}
+  fooBaz(Foo{0});
+  fooBaz(Foo{1}); // expected-error{{oh no}}
+}
+}
+
+namespace udl {
+void operator""_fn(char c)_diagnose_if(c == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+    _diagnose_if(c == 2, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+
+void run() {
+  '\0'_fn;
+  '\1'_fn; // expected-warning{{oh no}}
+  '\2'_fn; // expected-error{{oh no}}
+}
+}
+
+namespace PR31638 {
+struct String {
+  String(char const* __s) _diagnose_if(__s == nullptr, "oh no ptr", "warning"); // expected-note{{from 'diagnose_if'}}
+  String(int __s) _diagnose_if(__s != 0, "oh no int", "warning"); // expected-note{{from 'diagnose_if'}}
+};
+
+void run() {
+  String s(nullptr); // expected-warning{{oh no ptr}}
+  String ss(42); // expected-warning{{oh no int}}
+}
+}
+
+namespace PR31639 {
+struct Foo {
+  Foo(int I) __attribute__((diagnose_if(I, "oh no", "error"))); // expected-note{{from 'diagnose_if'}}
+};
+
+void bar() { Foo f(1); } // expected-error{{oh no}}
+}
+
+namespace user_defined_conversion {
+struct Foo {
+  int i;
+  constexpr Foo(int i): i(i) {}
+  operator size_t() const _diagnose_if(i == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+      _diagnose_if(i == 2, "oh no", "error"); // expected-note{{from 'diagnose_if'}}
+};
+
+void run() {
+  // `new T[N]`, where N is implicitly convertible to size_t, calls
+  // PerformImplicitConversion directly. This lets us test the diagnostic logic
+  // in PerformImplicitConversion.
+  new int[Foo{0}];
+  new int[Foo{1}]; // expected-warning{{oh no}}
+  new int[Foo{2}]; // expected-error{{oh no}}
+}
+}
+
+namespace std {
+  template <typename T>
+  struct initializer_list {
+    const T *ptr;
+    size_t elems;
+
+    constexpr size_t size() const { return elems; }
+  };
+}
+
+namespace initializer_lists {
+struct Foo {
+  Foo(std::initializer_list<int> l)
+    _diagnose_if(l.size() == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+    _diagnose_if(l.size() == 2, "oh no", "error") {} // expected-note{{from 'diagnose_if'}}
+};
+
+void run() {
+  Foo{std::initializer_list<int>{}};
+  Foo{std::initializer_list<int>{1}}; // expected-warning{{oh no}}
+  Foo{std::initializer_list<int>{1, 2}}; // expected-error{{oh no}}
+  Foo{std::initializer_list<int>{1, 2, 3}};
+}
+}
+
+namespace range_for_loop {
+  namespace adl {
+    struct Foo {
+      int i;
+      constexpr Foo(int i): i(i) {}
+    };
+    void **begin(const Foo &f) _diagnose_if(f.i, "oh no", "warning");
+    void **end(const Foo &f) _diagnose_if(f.i, "oh no", "warning");
+
+    struct Bar {
+      int i;
+      constexpr Bar(int i): i(i) {}
+    };
+    void **begin(const Bar &b) _diagnose_if(b.i, "oh no", "error");
+    void **end(const Bar &b) _diagnose_if(b.i, "oh no", "error");
+  }
+
+  void run() {
+    for (void *p : adl::Foo(0)) {}
+    // FIXME: This should emit diagnostics. It seems that our constexpr
+    // evaluator isn't able to evaluate `adl::Foo(1)` as a constant, though.
+    for (void *p : adl::Foo(1)) {}
+
+    for (void *p : adl::Bar(0)) {}
+    // FIXME: Same thing.
+    for (void *p : adl::Bar(1)) {}
+  }
+}
+
+namespace operator_new {
+struct Foo {
+  int j;
+  static void *operator new(size_t i) _diagnose_if(i, "oh no", "warning");
+};
+
+struct Bar {
+  int j;
+  static void *operator new(size_t i) _diagnose_if(!i, "oh no", "warning");
+};
+
+void run() {
+  // FIXME: This should emit a diagnostic.
+  new Foo();
+  // This is here because we sometimes pass a dummy argument `operator new`. We
+  // should ignore this, rather than complaining about it.
+  new Bar();
+}
+}
+
+namespace contextual_implicit_conv {
+struct Foo {
+  int i;
+  constexpr Foo(int i): i(i) {}
+  constexpr operator int() const _diagnose_if(i == 1, "oh no", "warning") // expected-note{{from 'diagnose_if'}}
+      _diagnose_if(i == 2, "oh no", "error") { // expected-note{{from 'diagnose_if'}}
+    return i;
+  }
+};
+
+void run() {
+  switch (constexpr Foo i = 0) { default: break; }
+  switch (constexpr Foo i = 1) { default: break; } // expected-warning{{oh no}}
+  switch (constexpr Foo i = 2) { default: break; } // expected-error{{oh no}}
 }
 }

From 15de40171694cbc46a4d1aeb4cd13e9a748fdede Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 1 Feb 2017 21:35:12 +0000
Subject: [PATCH 3/4] Vendor import of libc++ release_40 branch r293807:
 https://llvm.org/svn/llvm-project/libcxx/branches/release_40@293807

---
 include/mutex | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/mutex b/include/mutex
index 3b878c642cce..8526533f1402 100644
--- a/include/mutex
+++ b/include/mutex
@@ -559,7 +559,6 @@ public:
 #endif
 
 template <class _Fp>
-inline _LIBCPP_INLINE_VISIBILITY
 void
 __call_once_proxy(void* __vp)
 {

From 63b9abd1dbe002d940a818f51dd9d6e585e41c84 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 1 Feb 2017 21:35:17 +0000
Subject: [PATCH 4/4] Vendor import of lld release_40 branch r293807:
 https://llvm.org/svn/llvm-project/lld/branches/release_40@293807

---
 ELF/InputFiles.h           |   4 --
 ELF/InputSection.cpp       |   3 +-
 ELF/SyntheticSections.cpp  |  80 ++++++++++++++++++++++---------------
 ELF/SyntheticSections.h    |  13 +++---
 ELF/Writer.cpp             |  12 ++----
 test/ELF/Inputs/dtrace-r.o | Bin 0 -> 624 bytes
 test/ELF/aarch64-relro.s   |  14 +++++++
 test/ELF/basic-mips.s      |   2 +-
 test/ELF/basic-ppc.s       |   2 +-
 test/ELF/dtrace-r.test     |   8 ++++
 10 files changed, 85 insertions(+), 53 deletions(-)
 create mode 100644 test/ELF/Inputs/dtrace-r.o
 create mode 100644 test/ELF/aarch64-relro.s
 create mode 100644 test/ELF/dtrace-r.test

diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h
index 73dda7b566b8..95888061d877 100644
--- a/ELF/InputFiles.h
+++ b/ELF/InputFiles.h
@@ -180,10 +180,6 @@ template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> {
   // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
   uint32_t MipsGp0 = 0;
 
-  // The number is the offset in the string table. It will be used as the
-  // st_name of the symbol.
-  std::vector<std::pair<const DefinedRegular<ELFT> *, unsigned>> KeptLocalSyms;
-
   // Name of source file obtained from STT_FILE symbol value,
   // or empty string if there is no such symbol in object file
   // symbol table.
diff --git a/ELF/InputSection.cpp b/ELF/InputSection.cpp
index 358004248373..6b1e92891b98 100644
--- a/ELF/InputSection.cpp
+++ b/ELF/InputSection.cpp
@@ -246,7 +246,8 @@ void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
     if (Config->Rela)
       P->r_addend = getAddend<ELFT>(Rel);
     P->r_offset = RelocatedSection->getOffset(Rel.r_offset);
-    P->setSymbolAndType(Body.DynsymIndex, Type, Config->Mips64EL);
+    P->setSymbolAndType(In<ELFT>::SymTab->getSymbolIndex(&Body), Type,
+                        Config->Mips64EL);
   }
 }
 
diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp
index f09b60b2b494..204b5993a62a 100644
--- a/ELF/SyntheticSections.cpp
+++ b/ELF/SyntheticSections.cpp
@@ -1065,22 +1065,21 @@ template <class ELFT> void SymbolTableSection<ELFT>::finalize() {
   this->OutSec->Info = this->Info = NumLocals + 1;
   this->OutSec->Entsize = this->Entsize;
 
-  if (Config->Relocatable) {
-    size_t I = NumLocals;
-    for (const SymbolTableEntry &S : Symbols)
-      S.Symbol->DynsymIndex = ++I;
+  if (Config->Relocatable)
+    return;
+
+  if (!StrTabSec.isDynamic()) {
+    auto GlobBegin = Symbols.begin() + NumLocals;
+    auto It = std::stable_partition(
+        GlobBegin, Symbols.end(), [](const SymbolTableEntry &S) {
+          return S.Symbol->symbol()->computeBinding() == STB_LOCAL;
+        });
+    // update sh_info with number of Global symbols output with computed
+    // binding of STB_LOCAL
+    this->OutSec->Info = this->Info = 1 + It - Symbols.begin();
     return;
   }
 
-  if (!StrTabSec.isDynamic()) {
-    std::stable_sort(
-        Symbols.begin(), Symbols.end(),
-        [](const SymbolTableEntry &L, const SymbolTableEntry &R) {
-          return L.Symbol->symbol()->computeBinding() == STB_LOCAL &&
-                 R.Symbol->symbol()->computeBinding() != STB_LOCAL;
-        });
-    return;
-  }
   if (In<ELFT>::GnuHashTab)
     // NB: It also sorts Symbols to meet the GNU hash table requirements.
     In<ELFT>::GnuHashTab->addSymbols(Symbols);
@@ -1094,10 +1093,25 @@ template <class ELFT> void SymbolTableSection<ELFT>::finalize() {
     S.Symbol->DynsymIndex = ++I;
 }
 
-template <class ELFT> void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) {
+template <class ELFT> void SymbolTableSection<ELFT>::addGlobal(SymbolBody *B) {
   Symbols.push_back({B, StrTabSec.addString(B->getName(), false)});
 }
 
+template <class ELFT> void SymbolTableSection<ELFT>::addLocal(SymbolBody *B) {
+  assert(!StrTabSec.isDynamic());
+  ++NumLocals;
+  Symbols.push_back({B, StrTabSec.addString(B->getName())});
+}
+
+template <class ELFT>
+size_t SymbolTableSection<ELFT>::getSymbolIndex(SymbolBody *Body) {
+  auto I = llvm::find_if(
+      Symbols, [&](const SymbolTableEntry &E) { return E.Symbol == Body; });
+  if (I == Symbols.end())
+    return 0;
+  return I - Symbols.begin() + 1;
+}
+
 template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) {
   Buf += sizeof(Elf_Sym);
 
@@ -1113,26 +1127,24 @@ template <class ELFT>
 void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) {
   // Iterate over all input object files to copy their local symbols
   // to the output symbol table pointed by Buf.
-  for (ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) {
-    for (const std::pair<const DefinedRegular<ELFT> *, size_t> &P :
-         File->KeptLocalSyms) {
-      const DefinedRegular<ELFT> &Body = *P.first;
-      InputSectionBase<ELFT> *Section = Body.Section;
-      auto *ESym = reinterpret_cast<Elf_Sym *>(Buf);
 
-      if (!Section) {
-        ESym->st_shndx = SHN_ABS;
-        ESym->st_value = Body.Value;
-      } else {
-        const OutputSectionBase *OutSec = Section->OutSec;
-        ESym->st_shndx = OutSec->SectionIndex;
-        ESym->st_value = OutSec->Addr + Section->getOffset(Body);
-      }
-      ESym->st_name = P.second;
-      ESym->st_size = Body.template getSize<ELFT>();
-      ESym->setBindingAndType(STB_LOCAL, Body.Type);
-      Buf += sizeof(*ESym);
+  for (auto I = Symbols.begin(); I != Symbols.begin() + NumLocals; ++I) {
+    const DefinedRegular<ELFT> &Body = *cast<DefinedRegular<ELFT>>(I->Symbol);
+    InputSectionBase<ELFT> *Section = Body.Section;
+    auto *ESym = reinterpret_cast<Elf_Sym *>(Buf);
+
+    if (!Section) {
+      ESym->st_shndx = SHN_ABS;
+      ESym->st_value = Body.Value;
+    } else {
+      const OutputSectionBase *OutSec = Section->OutSec;
+      ESym->st_shndx = OutSec->SectionIndex;
+      ESym->st_value = OutSec->Addr + Section->getOffset(Body);
     }
+    ESym->st_name = I->StrTabOffset;
+    ESym->st_size = Body.template getSize<ELFT>();
+    ESym->setBindingAndType(STB_LOCAL, Body.Type);
+    Buf += sizeof(*ESym);
   }
 }
 
@@ -1141,7 +1153,9 @@ void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *Buf) {
   // Write the internal symbol table contents to the output symbol table
   // pointed by Buf.
   auto *ESym = reinterpret_cast<Elf_Sym *>(Buf);
-  for (const SymbolTableEntry &S : Symbols) {
+
+  for (auto I = Symbols.begin() + NumLocals; I != Symbols.end(); ++I) {
+    const SymbolTableEntry &S = *I;
     SymbolBody *Body = S.Symbol;
     size_t StrOff = S.StrTabOffset;
 
diff --git a/ELF/SyntheticSections.h b/ELF/SyntheticSections.h
index f7e891ec3a66..df67e079ad0e 100644
--- a/ELF/SyntheticSections.h
+++ b/ELF/SyntheticSections.h
@@ -366,23 +366,26 @@ class SymbolTableSection final : public SyntheticSection<ELFT> {
   void finalize() override;
   void writeTo(uint8_t *Buf) override;
   size_t getSize() const override { return getNumSymbols() * sizeof(Elf_Sym); }
-  void addSymbol(SymbolBody *Body);
+  void addGlobal(SymbolBody *Body);
+  void addLocal(SymbolBody *Body);
   StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; }
-  unsigned getNumSymbols() const { return NumLocals + Symbols.size() + 1; }
+  unsigned getNumSymbols() const { return Symbols.size() + 1; }
+  size_t getSymbolIndex(SymbolBody *Body);
 
   ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; }
 
   static const OutputSectionBase *getOutputSection(SymbolBody *Sym);
 
-  unsigned NumLocals = 0;
-  StringTableSection<ELFT> &StrTabSec;
-
 private:
   void writeLocalSymbols(uint8_t *&Buf);
   void writeGlobalSymbols(uint8_t *Buf);
 
   // A vector of symbols and their string table offsets.
   std::vector<SymbolTableEntry> Symbols;
+
+  StringTableSection<ELFT> &StrTabSec;
+
+  unsigned NumLocals = 0;
 };
 
 // Outputs GNU Hash section. For detailed explanation see:
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 4f2f91993f8d..f00fb6d11ea5 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -455,11 +455,7 @@ template <class ELFT> void Writer<ELFT>::copyLocalSymbols() {
       InputSectionBase<ELFT> *Sec = DR->Section;
       if (!shouldKeepInSymtab<ELFT>(Sec, B->getName(), *B))
         continue;
-      ++In<ELFT>::SymTab->NumLocals;
-      if (Config->Relocatable)
-        B->DynsymIndex = In<ELFT>::SymTab->NumLocals;
-      F->KeptLocalSyms.push_back(std::make_pair(
-          DR, In<ELFT>::SymTab->StrTabSec.addString(B->getName())));
+      In<ELFT>::SymTab->addLocal(B);
     }
   }
 }
@@ -1024,10 +1020,10 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
     if (!includeInSymtab<ELFT>(*Body))
       continue;
     if (In<ELFT>::SymTab)
-      In<ELFT>::SymTab->addSymbol(Body);
+      In<ELFT>::SymTab->addGlobal(Body);
 
     if (In<ELFT>::DynSymTab && S->includeInDynsym()) {
-      In<ELFT>::DynSymTab->addSymbol(Body);
+      In<ELFT>::DynSymTab->addGlobal(Body);
       if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(Body))
         if (SS->file()->isNeeded())
           In<ELFT>::VerNeed->addSymbol(SS);
@@ -1466,7 +1462,7 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() {
       // The glibc dynamic loader rounds the size down, so we need to round up
       // to protect the last page. This is a no-op on FreeBSD which always
       // rounds up.
-      P.p_memsz = alignTo(P.p_memsz, Config->MaxPageSize);
+      P.p_memsz = alignTo(P.p_memsz, Target->PageSize);
     }
 
     // The TLS pointer goes after PT_TLS. At least glibc will align it,
diff --git a/test/ELF/Inputs/dtrace-r.o b/test/ELF/Inputs/dtrace-r.o
new file mode 100644
index 0000000000000000000000000000000000000000..ce742de3bd2be7140052612b7d94cdc52f4210a5
GIT binary patch
literal 624
zcmb<-^>JfjWaMOk0!9Wq21XbMi8eqIbO4JxFtCDD>J_EtB<hu<R+KQr$ETDOB_^k)
z<|QWOq^86dl_pij$HylY6(v^cCKsh9mZSpJ7nc;3Bqjmr%3Kg#lmt{z8K0M09-o|8
zkjS6{F~iW<(9G1xQV(6)!}!4A2}tfDmgxvo$OMEizzSvX0BHe+kN;ufFgg&(5Mshq
z50fXCHUw&9#-iB)Dpr6dhZO8!88#pZ5(Cl5U;~r`^A}8x1;&8VAid~fAgLWtc?l>D
bQUmfm2b#DgR2(V{r!GKQ2~Zkj4>tV(9%VCv

literal 0
HcmV?d00001

diff --git a/test/ELF/aarch64-relro.s b/test/ELF/aarch64-relro.s
new file mode 100644
index 000000000000..3ec19d73d683
--- /dev/null
+++ b/test/ELF/aarch64-relro.s
@@ -0,0 +1,14 @@
+# REQUIRES: aarch64
+# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %t
+# RUN: ld.lld %t -o %t2
+# RUN: llvm-readobj -program-headers %t2 | FileCheck %s
+
+# CHECK:      Type: PT_GNU_RELRO
+# CHECK-NEXT: Offset:
+# CHECK-NEXT: VirtualAddress:
+# CHECK-NEXT: PhysicalAddress:
+# CHECK-NEXT: FileSize:
+# CHECK-NEXT: MemSize: 4096
+
+.section .data.rel.ro,"aw",%progbits
+.byte 1
diff --git a/test/ELF/basic-mips.s b/test/ELF/basic-mips.s
index 67b58f8f028c..57181caf4eaf 100644
--- a/test/ELF/basic-mips.s
+++ b/test/ELF/basic-mips.s
@@ -176,7 +176,7 @@ __start:
 # CHECK-NEXT:     Offset: 0x20010
 # CHECK-NEXT:     Size: 48
 # CHECK-NEXT:     Link: 10
-# CHECK-NEXT:     Info: 1
+# CHECK-NEXT:     Info: 2
 # CHECK-NEXT:     AddressAlignment: 4
 # CHECK-NEXT:     EntrySize: 16
 # CHECK-NEXT:   }
diff --git a/test/ELF/basic-ppc.s b/test/ELF/basic-ppc.s
index e08c7a32eb7c..aae81fe2ac0e 100644
--- a/test/ELF/basic-ppc.s
+++ b/test/ELF/basic-ppc.s
@@ -178,7 +178,7 @@
 // CHECK-NEXT:     Offset: 0x2038
 // CHECK-NEXT:     Size: 32
 // CHECK-NEXT:     Link: 9
-// CHECK-NEXT:     Info: 1
+// CHECK-NEXT:     Info: 2
 // CHECK-NEXT:     AddressAlignment: 4
 // CHECK-NEXT:     EntrySize: 16
 // CHECK-NEXT:     SectionData (
diff --git a/test/ELF/dtrace-r.test b/test/ELF/dtrace-r.test
new file mode 100644
index 000000000000..2b6d885d4c89
--- /dev/null
+++ b/test/ELF/dtrace-r.test
@@ -0,0 +1,8 @@
+RUN: ld.lld -r -o %t.o %p/Inputs/dtrace-r.o
+RUN: llvm-readobj -r %t.o | FileCheck %s
+
+CHECK:      Relocations [
+CHECK-NEXT:   Section ({{.*}}) .rela.text {
+CHECK-NEXT:     0x0 R_X86_64_NONE - 0x0
+CHECK-NEXT:   }
+CHECK-NEXT: ]