Vendor import of llvm release_40 branch r293807:

https://llvm.org/svn/llvm-project/llvm/branches/release_40@293807
2017-02-01 21:34:47 +00:00 · 2017-02-01 21:34:47 +00:00 · 823f87a1f8
commit 823f87a1f8
parent aac4ca60bc
80 changed files with 2683 additions and 1511 deletions
--- a/cmake/modules/DetermineGCCCompatible.cmake
+++ b/cmake/modules/DetermineGCCCompatible.cmake
@ -7,5 +7,7 @@ if(NOT DEFINED LLVM_COMPILER_IS_GCC_COMPATIBLE)
    set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF)
  elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
+  elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" )
+    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
  endif()
 endif()
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@ -480,6 +480,12 @@ class AsmPrinter : public MachineFunctionPass {
  /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
  virtual unsigned getISAEncoding() { return 0; }

+  /// Emit the directive and value for debug thread local expression
+  ///
+  /// \p Value - The value to emit.
+  /// \p Size - The size of the integer (in bytes) to emit.
+  virtual void EmitDebugValue(const MCExpr *Value, unsigned Size) const;
+
  //===------------------------------------------------------------------===//
  // Dwarf Lowering Routines
  //===------------------------------------------------------------------===//
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@ -305,7 +305,7 @@ class SelectionDAGISel : public MachineFunctionPass {
  std::vector<unsigned> OpcodeOffset;

  void UpdateChains(SDNode *NodeToMatch, SDValue InputChain,
-                    const SmallVectorImpl<SDNode *> &ChainNodesMatched,
+                    SmallVectorImpl<SDNode *> &ChainNodesMatched,
                    bool isMorphNodeTo);
 };

--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -567,6 +567,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
  OutStreamer->AddBlankLine();
 }

+/// Emit the directive and value for debug thread local expression
+///
+/// \p Value - The value to emit.
+/// \p Size - The size of the integer (in bytes) to emit.
+void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+                                      unsigned Size) const {
+  OutStreamer->EmitValue(Value, Size);
+}
+
 /// EmitFunctionHeader - This method emits the header for the current
 /// function.
 void AsmPrinter::EmitFunctionHeader() {
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@ -484,7 +484,7 @@ void DIEInteger::print(raw_ostream &O) const {
 /// EmitValue - Emit expression value.
 ///
 void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
-  AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
+  AP->EmitDebugValue(Expr, SizeOf(AP, Form));
 }

 /// SizeOf - Determine size of expression value in bytes.
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
 /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
 /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
 static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
-                               unsigned MaxFactor) {
+                               unsigned MaxFactor, unsigned OpNumElts) {
  unsigned NumElts = Mask.size();
  if (NumElts < 4)
    return false;
@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,

      if (StartMask < 0)
        break;
+      // We must stay within the vectors; This case can happen with undefs.
+      if (StartMask + LaneLen > OpNumElts*2)
+        break;
    }

    // Found an interleaved mask of current factor.
@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore(

  // Check if the shufflevector is RE-interleave shuffle.
  unsigned Factor;
-  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor))
+  unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
    return false;

  DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@ -2248,7 +2248,7 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
 /// to use the new results.
 void SelectionDAGISel::UpdateChains(
    SDNode *NodeToMatch, SDValue InputChain,
-    const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+    SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
  SmallVector<SDNode*, 4> NowDeadNodes;

  // Now that all the normal results are replaced, we replace the chain and
@ -2260,6 +2260,11 @@ void SelectionDAGISel::UpdateChains(
    // Replace all the chain results with the final chain we ended up with.
    for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
      SDNode *ChainNode = ChainNodesMatched[i];
+      // If ChainNode is null, it's because we replaced it on a previous
+      // iteration and we cleared it out of the map. Just skip it.
+      if (!ChainNode)
+        continue;
+
      assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
             "Deleted node left in chain");

@ -2272,6 +2277,11 @@ void SelectionDAGISel::UpdateChains(
      if (ChainVal.getValueType() == MVT::Glue)
        ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
      assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+      SelectionDAG::DAGNodeDeletedListener NDL(
+          *CurDAG, [&](SDNode *N, SDNode *E) {
+            std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
+                         static_cast<SDNode *>(nullptr));
+          });
      CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);

      // If the node became dead and we haven't already seen it, delete it.
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@ -142,7 +142,8 @@ static bool canGoAfterDWARF(const MCSectionMachO &MSec) {
  if (SegName == "__TEXT" && SecName == "__eh_frame")
    return true;

-  if (SegName == "__DATA" && SecName == "__nl_symbol_ptr")
+  if (SegName == "__DATA" && (SecName == "__nl_symbol_ptr" ||
+                              SecName == "__thread_ptr"))
    return true;

  return false;
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@ -413,6 +413,7 @@ class MipsAsmParser : public MCTargetAsmParser {
    Match_RequiresDifferentOperands,
    Match_RequiresNoZeroRegister,
    Match_RequiresSameSrcAndDst,
+    Match_NoFCCRegisterForCurrentISA,
    Match_NonZeroOperandForSync,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "MipsGenAsmMatcher.inc"
@ -1461,8 +1462,6 @@ class MipsOperand : public MCParsedAsmOperand {
  bool isFCCAsmReg() const {
    if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
      return false;
-    if (!AsmParser.hasEightFccRegisters())
-      return RegIdx.Index == 0;
    return RegIdx.Index <= 7;
  }
  bool isACCAsmReg() const {
@ -4053,6 +4052,7 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
    return Match_RequiresSameSrcAndDst;
  }
 }
+
 unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
  switch (Inst.getOpcode()) {
  // As described by the MIPSR6 spec, daui must not use the zero operand for
@ -4131,9 +4131,15 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
    if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
      return Match_RequiresDifferentOperands;
    return Match_Success;
-  default:
-    return Match_Success;
  }
+
+  uint64_t TSFlags = getInstDesc(Inst.getOpcode()).TSFlags;
+  if ((TSFlags & MipsII::HasFCCRegOperand) &&
+      (Inst.getOperand(0).getReg() != Mips::FCC0) && !hasEightFccRegisters())
+    return Match_NoFCCRegisterForCurrentISA;
+
+  return Match_Success;
+
 }

 static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
@ -4191,6 +4197,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
    return Error(IDLoc, "invalid operand ($zero) for instruction");
  case Match_RequiresSameSrcAndDst:
    return Error(IDLoc, "source and destination must match");
+  case Match_NoFCCRegisterForCurrentISA:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "non-zero fcc register doesn't exist in current ISA level");
  case Match_Immz:
    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
  case Match_UImm1_0:
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@ -123,7 +123,9 @@ namespace MipsII {
    HasForbiddenSlot = 1 << 5,
    /// IsPCRelativeLoad - A Load instruction with implicit source register
    ///                    ($pc) with explicit offset and destination register
-    IsPCRelativeLoad = 1 << 6
+    IsPCRelativeLoad = 1 << 6,
+    /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
+    HasFCCRegOperand = 1 << 7

  };
 }
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@ -27,9 +27,20 @@ def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
               SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6;

 def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
-                  CEQS_FM_MM<0>;
+                  CEQS_FM_MM<0> {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}
+
 def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
-                  CEQS_FM_MM<1>;
+                  CEQS_FM_MM<1> {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}

 def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
              BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
@ -164,6 +175,98 @@ let AdditionalPredicates = [InMicroMips] in {
    def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>,
                  LW_FM_MM<0x26>;
  }
+
+  multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt,
+                      InstrItinClass itin> {
+    def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+                    C_COND_FM_MM<fmt, 0> {
+      let BaseOpcode = "c.f."#NAME;
+      let isCommutable = 1;
+    }
+    def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 1> {
+      let BaseOpcode = "c.un."#NAME;
+      let isCommutable = 1;
+    }
+    def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 2> {
+      let BaseOpcode = "c.eq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 3> {
+      let BaseOpcode = "c.ueq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 4> {
+      let BaseOpcode = "c.olt."#NAME;
+    }
+    def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 5> {
+      let BaseOpcode = "c.ult."#NAME;
+    }
+    def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 6> {
+      let BaseOpcode = "c.ole."#NAME;
+    }
+    def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+                       C_COND_FM_MM<fmt, 7> {
+      let BaseOpcode = "c.ule."#NAME;
+    }
+    def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 8> {
+      let BaseOpcode = "c.sf."#NAME;
+      let isCommutable = 1;
+    }
+    def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+                       C_COND_FM_MM<fmt, 9> {
+      let BaseOpcode = "c.ngle."#NAME;
+    }
+    def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 10> {
+      let BaseOpcode = "c.seq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 11> {
+      let BaseOpcode = "c.ngl."#NAME;
+    }
+    def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 12> {
+      let BaseOpcode = "c.lt."#NAME;
+    }
+    def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 13> {
+      let BaseOpcode = "c.nge."#NAME;
+    }
+    def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 14> {
+      let BaseOpcode = "c.le."#NAME;
+    }
+    def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 15> {
+      let BaseOpcode = "c.ngt."#NAME;
+    }
+  }
+
+  defm S   : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>,
+             ISA_MIPS1_NOT_32R6_64R6;
+  defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  let DecoderNamespace = "Mips64" in
+  defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm S_MM   : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6;
+  defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm : BC1_ALIASES<BC1T_MM, "bc1t", BC1F_MM, "bc1f">,
+         ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
 }

 //===----------------------------------------------------------------------===//
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@ -766,6 +766,7 @@ class SWXC1_FM_MM<bits<9> funct> : MMArch {
 class CEQS_FM_MM<bits<2> fmt> : MMArch {
  bits<5> fs;
  bits<5> ft;
+  bits<3> fcc;
  bits<4> cond;

  bits<32> Inst;
@ -773,13 +774,17 @@ class CEQS_FM_MM<bits<2> fmt> : MMArch {
  let Inst{31-26} = 0x15;
  let Inst{25-21} = ft;
  let Inst{20-16} = fs;
-  let Inst{15-13} = 0x0;  // cc
+  let Inst{15-13} = fcc;
  let Inst{12}    = 0;
  let Inst{11-10} = fmt;
  let Inst{9-6}   = cond;
  let Inst{5-0}   = 0x3c;
 }

+class C_COND_FM_MM<bits <2> fmt, bits<4> c> : CEQS_FM_MM<fmt> {
+  let cond = c;
+}
+
 class BC1F_FM_MM<bits<5> tf> : MMArch {
  bits<16> offset;

--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@ -1037,6 +1037,22 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
  // TODO: implement
 }

+// Emit .dtprelword or .dtpreldword directive
+// and value for debug thread local expression.
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value,
+                                          unsigned Size) const {
+  switch (Size) {
+  case 4:
+    OutStreamer->EmitDTPRel32Value(Value);
+    break;
+  case 8:
+    OutStreamer->EmitDTPRel64Value(Value);
+    break;
+  default:
+    llvm_unreachable("Unexpected size of expression value.");
+  }
+}
+
 // Align all targets of indirect branches on bundle size.  Used only if target
 // is NaCl.
 void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@ -140,6 +140,7 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
  void EmitStartOfAsmFile(Module &M) override;
  void EmitEndOfAsmFile(Module &M) override;
  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+  void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
 };
 }

--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@ -698,8 +698,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
    unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
    emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
    emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
-    emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg(
-        Mips::FCC0, RegState::ImplicitDefine);
+    emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg)
+                 .addReg(RightReg);
    emitInst(CondMovOpc, ResultReg)
        .addReg(RegWithOne)
        .addReg(Mips::FCC0)
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@ -219,6 +219,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
  let isTerminator = 1;
  let hasDelaySlot = DelaySlot;
  let Defs = [AT];
+  let hasFCCRegOperand = 1;
 }

 class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
@ -229,41 +230,106 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
         !strconcat("c.$cond.", typestr)>, HARDFLOAT {
  let Defs = [FCC0];
  let isCodeGenOnly = 1;
+  let hasFCCRegOperand = 1;
 }

+
+// Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather
+//       duplicating the instruction definition for MIPS1 - MIPS3, we expand
+//       c.cond.ft if necessary, and reject it after constructing the
+//       instruction if the ISA doesn't support it.
 class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC,
                InstrItinClass itin>  :
-   InstSE<(outs), (ins RC:$fs, RC:$ft),
-          !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin,
-          FrmFR>, HARDFLOAT;
+   InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft),
+          !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin,
+          FrmFR>, HARDFLOAT {
+  let isCompare = 1;
+  let hasFCCRegOperand = 1;
+}
+

 multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
                    InstrItinClass itin> {
-  def C_F_#NAME : C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM<fmt, 0>;
-  def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM<fmt, 1>;
-  def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM<fmt, 2>;
-  def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM<fmt, 3>;
-  def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM<fmt, 4>;
-  def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM<fmt, 5>;
-  def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM<fmt, 6>;
-  def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM<fmt, 7>;
-  def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM<fmt, 8>;
-  def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM<fmt, 9>;
-  def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM<fmt, 10>;
-  def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM<fmt, 11>;
-  def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM<fmt, 12>;
-  def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM<fmt, 13>;
-  def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM<fmt, 14>;
-  def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>;
+  def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+                  C_COND_FM<fmt, 0> {
+    let BaseOpcode = "c.f."#NAME;
+    let isCommutable = 1;
+  }
+  def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 1> {
+    let BaseOpcode = "c.un."#NAME;
+    let isCommutable = 1;
+  }
+  def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 2> {
+    let BaseOpcode = "c.eq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 3> {
+    let BaseOpcode = "c.ueq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 4> {
+    let BaseOpcode = "c.olt."#NAME;
+  }
+  def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 5> {
+    let BaseOpcode = "c.ult."#NAME;
+  }
+  def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 6> {
+    let BaseOpcode = "c.ole."#NAME;
+  }
+  def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+                     C_COND_FM<fmt, 7> {
+    let BaseOpcode = "c.ule."#NAME;
+  }
+  def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 8> {
+    let BaseOpcode = "c.sf."#NAME;
+    let isCommutable = 1;
+  }
+  def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+                     C_COND_FM<fmt, 9> {
+    let BaseOpcode = "c.ngle."#NAME;
+  }
+  def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 10> {
+    let BaseOpcode = "c.seq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 11> {
+    let BaseOpcode = "c.ngl."#NAME;
+  }
+  def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 12> {
+    let BaseOpcode = "c.lt."#NAME;
+  }
+  def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 13> {
+    let BaseOpcode = "c.nge."#NAME;
+  }
+  def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 14> {
+    let BaseOpcode = "c.le."#NAME;
+  }
+  def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 15> {
+    let BaseOpcode = "c.ngt."#NAME;
+  }
 }

+let AdditionalPredicates = [NotInMicroMips] in {
 defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
 defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
           FGR_32;
 let DecoderNamespace = "Mips64" in
 defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
           FGR_64;
-
+}
 //===----------------------------------------------------------------------===//
 // Floating Point Instructions
 //===----------------------------------------------------------------------===//
@ -549,13 +615,29 @@ def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
 /// Floating Point Compare
 let AdditionalPredicates = [NotInMicroMips] in {
  def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
-                 ISA_MIPS1_NOT_32R6_64R6;
+                 ISA_MIPS1_NOT_32R6_64R6 {
+
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+  }
  def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-                 ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+                 ISA_MIPS1_NOT_32R6_64R6, FGR_32 {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+  }
 }
 let DecoderNamespace = "Mips64" in
 def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-               ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+               ISA_MIPS1_NOT_32R6_64R6, FGR_64 {
+  // FIXME: This is a required to work around the fact that thiese instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}

 //===----------------------------------------------------------------------===//
 // Floating Point Pseudo-Instructions
@ -602,15 +684,6 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd),
 //===----------------------------------------------------------------------===//
 // InstAliases.
 //===----------------------------------------------------------------------===//
-def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-
 def : MipsInstAlias
        <"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
      ISA_MIPS2, HARDFLOAT;
@ -630,6 +703,80 @@ def : MipsInstAlias
 def : MipsInstAlias
        <"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
      FGR_64, ISA_MIPS2, HARDFLOAT;
+
+multiclass C_COND_ALIASES<string TypeStr, RegisterOperand RC> {
+  def : MipsInstAlias<!strconcat("c.f.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_F_"#NAME) FCC0,
+                                                       RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.un.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_UN_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.eq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_EQ_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ueq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_UEQ_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.olt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_OLT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ult.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_ULT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ole.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_OLE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ule.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_ULE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.sf.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_SF_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngle.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGLE_"#NAME) FCC0,
+                                                          RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.seq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_SEQ_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngl.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGL_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.lt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_LT_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.nge.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.le.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_LE_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+}
+
+multiclass BC1_ALIASES<Instruction BCTrue, string BCTrueString,
+                       Instruction BCFalse, string BCFalseString> {
+  def : MipsInstAlias<!strconcat(BCTrueString, " $offset"),
+                                (BCTrue FCC0, brtarget:$offset), 1>;
+
+  def : MipsInstAlias<!strconcat(BCFalseString, " $offset"),
+                                (BCFalse FCC0, brtarget:$offset), 1>;
+}
+
+let AdditionalPredicates = [NotInMicroMips] in {
+  defm S   : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6;
+  defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm : BC1_ALIASES<BC1T, "bc1t", BC1F, "bc1f">, ISA_MIPS1_NOT_32R6_64R6,
+         HARDFLOAT;
+  defm : BC1_ALIASES<BC1TL, "bc1tl", BC1FL, "bc1fl">, ISA_MIPS2_NOT_32R6_64R6,
+         HARDFLOAT;
+}
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
 //===----------------------------------------------------------------------===//
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@ -101,12 +101,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
  bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
                            // ($pc) and with explicit offset and destination
                            // register
+  bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is
+                            // present in MIPS-I to MIPS-III.

-  // TSFlags layout should be kept in sync with MipsInstrInfo.h.
+  // TSFlags layout should be kept in sync with MCTargetDesc/MipsBaseInfo.h.
  let TSFlags{3-0}   = FormBits;
  let TSFlags{4}     = isCTI;
  let TSFlags{5}     = hasForbiddenSlot;
  let TSFlags{6}     = IsPCRelativeLoad;
+  let TSFlags{7}     = hasFCCRegOperand;

  let DecoderNamespace = "Mips";

@ -829,6 +832,7 @@ class BC1F_FM<bit nd, bit tf> : StdArch {
 class CEQS_FM<bits<5> fmt> : StdArch {
  bits<5> fs;
  bits<5> ft;
+  bits<3> fcc;
  bits<4> cond;

  bits<32> Inst;
@ -837,7 +841,7 @@ class CEQS_FM<bits<5> fmt> : StdArch {
  let Inst{25-21} = fmt;
  let Inst{20-16} = ft;
  let Inst{15-11} = fs;
-  let Inst{10-8} = 0; // cc
+  let Inst{10-8} = fcc;
  let Inst{7-4} = 0x3;
  let Inst{3-0} = cond;
 }
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@ -148,3 +148,11 @@ MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL,
  // Otherwise, we work the same as ELF.
  return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
 }
+
+const MCExpr *
+MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+  const MCExpr *Expr =
+      MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+  return MCBinaryExpr::createAdd(
+      Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+}
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@ -42,6 +42,8 @@ class MipsTargetMachine;
    MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                     const Constant *C,
                                     unsigned &Align) const override;
+    /// Describe a TLS variable address within debug info.
+    const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
  };
 } // end namespace llvm

--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -1508,8 +1508,14 @@ def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
                      PPC970_DGroup_Single;
 } // hasSideEffects = 0

+def ICBLC  : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>;
+def ICBLQ  : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
 def ICBT  : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
                       "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;

 def : Pat<(int_ppc_dcbt xoaddr:$dst),
          (DCBT 0, xoaddr:$dst)>;
@ -2381,6 +2387,13 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
 def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
                     "mftb $RT, $SPR", IIC_SprMFTB>;

+def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR),
+                     "mfpmr $RT, $SPR", IIC_SprMFPMR>;
+
+def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
+                     "mtpmr $SPR, $RT", IIC_SprMTPMR>;
+
+
 // A pseudo-instruction used to implement the read of the 64-bit cycle counter
 // on a 32-bit target.
 let hasSideEffects = 1, usesCustomInserter = 1 in
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@ -118,6 +118,8 @@ def IIC_SprTLBIE     : InstrItinClass;
 def IIC_SprABORT     : InstrItinClass;
 def IIC_SprMSGSYNC   : InstrItinClass;
 def IIC_SprSTOP      : InstrItinClass;
+def IIC_SprMFPMR     : InstrItinClass;
+def IIC_SprMTPMR     : InstrItinClass;

 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@ -249,6 +249,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                  InstrStage<5, [E500_SFX0]>],
                                 [8, 1],
                                 [E500_GPR_Bypass, E500_CR_Bypass]>,
+  InstrItinData<IIC_SprMFPMR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<4, [E500_SFX0]>],
+                                 [7, 1], // Latency = 4, Repeat rate = 4
+                                 [E500_GPR_Bypass, E500_GPR_Bypass]>,
  InstrItinData<IIC_SprMFMSR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                  InstrStage<4, [E500_SFX0]>],
                                 [7, 1], // Latency = 4, Repeat rate = 4
@ -257,6 +261,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                  InstrStage<1, [E500_SFX0, E500_SFX1]>],
                                 [4, 1], // Latency = 1, Repeat rate = 1
                                 [E500_GPR_Bypass, E500_CR_Bypass]>,
+  InstrItinData<IIC_SprMTPMR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<1, [E500_SFX0]>],
+                                 [4, 1], // Latency = 1, Repeat rate = 1
+                                 [E500_CR_Bypass, E500_GPR_Bypass]>,
  InstrItinData<IIC_SprMFTB,     [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                  InstrStage<4, [E500_SFX0]>],
                                 [7, 1], // Latency = 4, Repeat rate = 4
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@ -313,20 +313,24 @@ def PPCE5500Itineraries : ProcessorItineraries<
                                  InstrStage<5, [E5500_CFX_0]>],
                                 [9, 2], // Latency = 5, Repeat rate = 5
                                 [E5500_GPR_Bypass, E5500_CR_Bypass]>,
-  InstrItinData<IIC_SprMFMSR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
-                                  InstrStage<4, [E5500_SFX0]>],
+  InstrItinData<IIC_SprMFPMR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<4, [E5500_CFX_0]>],
                                 [8, 2], // Latency = 4, Repeat rate = 4
                                 [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
  InstrItinData<IIC_SprMFSPR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                  InstrStage<1, [E5500_CFX_0]>],
                                 [5], // Latency = 1, Repeat rate = 1
                                 [E5500_GPR_Bypass]>,
+  InstrItinData<IIC_SprMTPMR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<1, [E5500_CFX_0]>],
+                                 [5], // Latency = 1, Repeat rate = 1
+                                 [E5500_GPR_Bypass]>,
  InstrItinData<IIC_SprMFTB,     [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                  InstrStage<4, [E5500_CFX_0]>],
                                 [8, 2], // Latency = 4, Repeat rate = 4
                                 [NoBypass, E5500_GPR_Bypass]>,
  InstrItinData<IIC_SprMTSPR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
-                                  InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+                                  InstrStage<1, [E5500_CFX_0]>],
                                 [5], // Latency = 1, Repeat rate = 1
                                 [E5500_GPR_Bypass]>,
  InstrItinData<IIC_FPGeneral,   [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -884,6 +884,10 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
  if (!GEPLHS->hasAllConstantIndices())
    return nullptr;

+  // Make sure the pointers have the same type.
+  if (GEPLHS->getType() != RHS->getType())
+    return nullptr;
+
  Value *PtrBase, *Index;
  std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);

--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@ -502,7 +502,8 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
      !DL.isNonIntegralPointerType(Ty)) {
    if (all_of(LI.users(), [&LI](User *U) {
          auto *SI = dyn_cast<StoreInst>(U);
-          return SI && SI->getPointerOperand() != &LI;
+          return SI && SI->getPointerOperand() != &LI &&
+                 !SI->getPointerOperand()->isSwiftError();
        })) {
      LoadInst *NewLoad = combineLoadToNewType(
          IC, LI,
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@ -1705,7 +1705,10 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,

    // If this is an exact definition of this function, then we can propagate
    // information about its result into callsites of it.
-    if (F.hasExactDefinition())
+    // Don't touch naked functions. They may contain asm returning a
+    // value we don't see, so we may end up interprocedurally propagating
+    // the return value incorrectly.
+    if (F.hasExactDefinition() && !F.hasFnAttribute(Attribute::Naked))
      Solver.AddTrackedFunction(&F);

    // If this function only has direct calls that we can see, we can track its
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
@ -1,86 +0,0 @@
-; RUN: opt < %s -mtriple=aarch64 -interleaved-access -S | FileCheck %s
-
-; CHECK-LABEL: @extract_user_basic(
-; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi(
-; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br label %if.merge
-
-if.merge:
-  %E2 = extractelement <8 x i32> %L, i32 2
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi_no_dom(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E2 = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_wrong_const_index(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_wrong_const_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 1
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_undef_index(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_undef_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 undef
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_var_index(
-; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
-define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 %I
-  ret void
-}
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
@ -1,393 +0,0 @@
-; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
-; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON
-
-; NEON-LABEL: load_factor2:
-; NEON: ld2 { v0.8b, v1.8b }, [x0]
-; NONEON-LABEL: load_factor2:
-; NONEON-NOT: ld2
-define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
-  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
-  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-  %add = add nsw <8 x i8> %strided.v0, %strided.v1
-  ret <8 x i8> %add
-}
-
-; NEON-LABEL: load_factor3:
-; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: load_factor3:
-; NONEON-NOT: ld3
-define <4 x i32> @load_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
-  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %add = add nsw <4 x i32> %strided.v2, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_factor4:
-; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: load_factor4:
-; NONEON-NOT: ld4
-define <4 x i32> @load_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_factor2:
-; NEON: st2 { v0.8b, v1.8b }, [x0]
-; NONEON-LABEL: store_factor2:
-; NONEON-NOT: st2
-define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
-  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor3:
-; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: store_factor3:
-; NONEON-NOT: st3
-define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor4:
-; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: store_factor4:
-; NONEON-NOT: st4
-define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; The following cases test that interleaved access of pointer vectors can be
-; matched to ldN/stN instruction.
-
-; NEON-LABEL: load_ptrvec_factor2:
-; NEON: ld2 { v0.2d, v1.2d }, [x0]
-; NONEON-LABEL: load_ptrvec_factor2:
-; NONEON-NOT: ld2
-define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
-  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
-  ret <2 x i32*> %strided.v0
-}
-
-; NEON-LABEL: load_ptrvec_factor3:
-; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
-; NONEON-LABEL: load_ptrvec_factor3:
-; NONEON-NOT: ld3
-define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
-  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
-  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
-  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: load_ptrvec_factor4:
-; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
-; NONEON-LABEL: load_ptrvec_factor4:
-; NONEON-NOT: ld4
-define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <8 x i32*>*
-  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
-  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
-  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
-  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor2:
-; NEON: st2 { v0.2d, v1.2d }, [x0]
-; NONEON-LABEL: store_ptrvec_factor2:
-; NONEON-NOT: st2
-define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor3:
-; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0]
-; NONEON-LABEL: store_ptrvec_factor3:
-; NONEON-NOT: st3
-define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor4:
-; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
-; NONEON-LABEL: store_ptrvec_factor4:
-; NONEON-NOT: st4
-define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
-  %base = bitcast i32* %ptr to <8 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
-  ret void
-}
-
-; Following cases check that shuffle maskes with undef indices can be matched
-; into ldN/stN instruction.
-
-; NEON-LABEL: load_undef_mask_factor2:
-; NEON: ld2 { v0.4s, v1.4s }, [x0]
-; NONEON-LABEL: load_undef_mask_factor2:
-; NONEON-NOT: ld2
-define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
-  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
-  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor3:
-; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: load_undef_mask_factor3:
-; NONEON-NOT: ld3
-define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
-  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
-  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %add = add nsw <4 x i32> %strided.v2, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor4:
-; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: load_undef_mask_factor4:
-; NONEON-NOT: ld4
-define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_undef_mask_factor2:
-; NEON: st2 { v0.4s, v1.4s }, [x0]
-; NONEON-LABEL: store_undef_mask_factor2:
-; NONEON-NOT: st2
-define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
-  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor3:
-; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
-; NONEON-LABEL: store_undef_mask_factor3:
-; NONEON-NOT: st3
-define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor4:
-; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
-; NONEON-LABEL: store_undef_mask_factor4:
-; NONEON-NOT: st4
-define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; Check that we do something sane with illegal types.
-
-; NEON-LABEL: load_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: ldr q[[V:[0-9]+]], [x0]
-; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s
-; NEON-NEXT: ret
-; NONEON-LABEL: load_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: ldr s0, [x0]
-; NONEON-NEXT: ldr s1, [x0, #8]
-; NONEON-NEXT: ret
-define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
-  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
-  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  ret <3 x float> %tmp2
-}
-
-; NEON-LABEL: store_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s
-; NEON-NEXT: st1 { v0.d }[0], [x0]
-; NEON-NEXT: ret
-; NONEON-LABEL: store_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2
-; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0
-; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32
-; NONEON-NEXT: str x[[RES]], [x0]
-; NONEON-NEXT: ret
-define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
-  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  store <3 x float> %tmp1, <3 x float>* %p, align 16
-  ret void
-}
-
-; NEON-LABEL: load_factor2_with_extract_user:
-; NEON: ld2 { v0.4s, v1.4s }, [x0]
-; NEON: mov w0, v0.s[1]
-; NONEON-LABEL: load_factor2_with_extract_user:
-; NONEON-NOT: ld2
-define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
-  %1 = load <8 x i32>, <8 x i32>* %a, align 8
-  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %3 = extractelement <8 x i32> %1, i32 2
-  ret i32 %3
-}
-
-; NEON-LABEL: store_general_mask_factor4:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefbeg:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefbeg:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefbeg(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefend:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefend:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefend(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmid:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefmid:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefmid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmulti:
-; NEON: st4 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor4_undefmulti:
-; NONEON-NOT: st4
-define void @store_general_mask_factor4_undefmulti(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3:
-; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor3:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_undefmultimid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undef_fail:
-; NEON-NOT: st3
-; NONEON-LABEL: store_general_mask_factor3_undef_fail:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_undef_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undeflane:
-; NEON: st3 { v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s, v{{[0-9]+}}.{{[0-9]+}}s }, [x0]
-; NONEON-LABEL: store_general_mask_factor3_undeflane:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_undeflane(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_negativestart:
-; NEON-NOT: st3
-; NONEON-LABEL: store_general_mask_factor3_negativestart:
-; NONEON-NOT: st3
-define void @store_general_mask_factor3_negativestart(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
--- a/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll
+++ b/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll
@ -1,86 +0,0 @@
-; RUN: opt < %s -mtriple=arm-eabi -mattr=+neon -interleaved-access -S | FileCheck %s
-
-; CHECK-LABEL: @extract_user_basic(
-; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi(
-; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 0
-; CHECK: extractelement <4 x i32> %[[R]], i64 1
-define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br label %if.merge
-
-if.merge:
-  %E2 = extractelement <8 x i32> %L, i32 2
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_multi_no_dom(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %E1 = extractelement <8 x i32> %L, i32 0
-  br i1 %C, label %if.then, label %if.merge
-
-if.then:
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E2 = extractelement <8 x i32> %L, i32 2
-  br label %if.merge
-
-if.merge:
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_wrong_const_index(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_wrong_const_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 1
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_undef_index(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_undef_index(<8 x i32>* %A) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 undef
-  ret void
-}
-
-; CHECK-LABEL: @extract_user_var_index(
-; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
-define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
-entry:
-  %L = load <8 x i32>, <8 x i32>* %A, align 8
-  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %E = extractelement <8 x i32> %L, i32 %I
-  ret void
-}
--- a/test/CodeGen/ARM/arm-interleaved-accesses.ll
+++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll
@ -1,462 +0,0 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
-; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON
-
-; NEON-LABEL: load_factor2:
-; NEON: vld2.8 {d16, d17}, [r0]
-; NONEON-LABEL: load_factor2:
-; NONEON-NOT: vld2
-define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
-  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
-  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-  %add = add nsw <8 x i8> %strided.v0, %strided.v1
-  ret <8 x i8> %add
-}
-
-; NEON-LABEL: load_factor3:
-; NEON: vld3.32 {d16, d17, d18}, [r0]
-; NONEON-LABEL: load_factor3:
-; NONEON-NOT: vld3
-define <2 x i32> @load_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <6 x i32>*
-  %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
-  %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
-  %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
-  %add = add nsw <2 x i32> %strided.v2, %strided.v1
-  ret <2 x i32> %add
-}
-
-; NEON-LABEL: load_factor4:
-; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: load_factor4:
-; NONEON-NOT: vld4
-define <4 x i32> @load_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_factor2:
-; NEON: vst2.8 {d16, d17}, [r0]
-; NONEON-LABEL: store_factor2:
-; NONEON-NOT: vst2
-define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
-  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor3:
-; NEON: vst3.32 {d16, d18, d20}, [r0]!
-; NEON: vst3.32 {d17, d19, d21}, [r0]
-; NONEON-LABEL: store_factor3:
-; NONEON-NOT: vst3.32
-define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_factor4:
-; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: store_factor4:
-; NONEON-NOT: vst4
-define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; The following cases test that interleaved access of pointer vectors can be
-; matched to ldN/stN instruction.
-
-; NEON-LABEL: load_ptrvec_factor2:
-; NEON: vld2.32 {d16, d17}, [r0]
-; NONEON-LABEL: load_ptrvec_factor2:
-; NONEON-NOT: vld2
-define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
-  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
-  ret <2 x i32*> %strided.v0
-}
-
-; NEON-LABEL: load_ptrvec_factor3:
-; NEON: vld3.32 {d16, d17, d18}, [r0]
-; NONEON-LABEL: load_ptrvec_factor3:
-; NONEON-NOT: vld3
-define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
-  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
-  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
-  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: load_ptrvec_factor4:
-; NEON: vld4.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: load_ptrvec_factor4:
-; NONEON-NOT: vld4
-define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
-  %base = bitcast i32** %ptr to <8 x i32*>*
-  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
-  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
-  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
-  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
-  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor2:
-; NEON: vst2.32 {d16, d17}, [r0]
-; NONEON-LABEL: store_ptrvec_factor2:
-; NONEON-NOT: vst2
-define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
-  %base = bitcast i32** %ptr to <4 x i32*>*
-  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor3:
-; NEON: vst3.32 {d16, d17, d18}, [r0]
-; NONEON-LABEL: store_ptrvec_factor3:
-; NONEON-NOT: vst3
-define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
-  %base = bitcast i32** %ptr to <6 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_ptrvec_factor4:
-; NEON: vst4.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: store_ptrvec_factor4:
-; NONEON-NOT: vst4
-define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
-  %base = bitcast i32* %ptr to <8 x i32*>*
-  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
-  ret void
-}
-
-; Following cases check that shuffle maskes with undef indices can be matched
-; into ldN/stN instruction.
-
-; NEON-LABEL: load_undef_mask_factor2:
-; NEON: vld2.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: load_undef_mask_factor2:
-; NONEON-NOT: vld2
-define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
-  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
-  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor3:
-; NEON: vld3.32 {d16, d18, d20}, [r0]!
-; NEON: vld3.32 {d17, d19, d21}, [r0]
-; NONEON-LABEL: load_undef_mask_factor3:
-; NONEON-NOT: vld3
-define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
-  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
-  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %add = add nsw <4 x i32> %strided.v2, %strided.v1
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: load_undef_mask_factor4:
-; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: load_undef_mask_factor4:
-; NONEON-NOT: vld4
-define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
-  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
-  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
-  %add = add nsw <4 x i32> %strided.v0, %strided.v2
-  ret <4 x i32> %add
-}
-
-; NEON-LABEL: store_undef_mask_factor2:
-; NEON: vst2.32 {d16, d17, d18, d19}, [r0]
-; NONEON-LABEL: store_undef_mask_factor2:
-; NONEON-NOT: vst2
-define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
-  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor3:
-; NEON: vst3.32 {d16, d18, d20}, [r0]!
-; NEON: vst3.32 {d17, d19, d21}, [r0]
-; NONEON-LABEL: store_undef_mask_factor3:
-; NONEON-NOT: vst3
-define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_undef_mask_factor4:
-; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
-; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
-; NONEON-LABEL: store_undef_mask_factor4:
-; NONEON-NOT: vst4
-define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-  %base = bitcast i32* %ptr to <16 x i32>*
-  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
-  ret void
-}
-
-; The following test cases check that address spaces are properly handled
-
-; NEON-LABEL: load_address_space
-; NEON: vld3.32
-; NONEON-LABEL: load_address_space
-; NONEON-NOT: vld3
-define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
- %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
- %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
- store <2 x i32> %interleaved, <2 x i32>* %B
- ret void
-}
-
-; NEON-LABEL: store_address_space
-; NEON: vst2.32
-; NONEON-LABEL: store_address_space
-; NONEON-NOT: vst2
-define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
- %tmp0 = load <2 x i32>, <2 x i32>* %A
- %tmp1 = load <2 x i32>, <2 x i32>* %B
- %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
- store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
- ret void
-}
-
-; Check that we do something sane with illegal types.
-
-; NEON-LABEL: load_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: vld1.64 {d16, d17}, [r0:128]
-; NEON-NEXT: vuzp.32 q8, {{.*}}
-; NEON-NEXT: vmov r0, r1, d16
-; NEON-NEXT: vmov r2, r3, {{.*}}
-; NEON-NEXT: mov pc, lr
-; NONEON-LABEL: load_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0]
-; NONEON-NEXT: ldr r1, [r0, #8]
-; NONEON-NEXT: mov r0, [[ELT0]]
-; NONEON-NEXT: mov pc, lr
-define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
-  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
-  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  ret <3 x float> %tmp2
-}
-
-; This lowering isn't great, but it's at least correct.
-
-; NEON-LABEL: store_illegal_factor2:
-; NEON: BB#0:
-; NEON-NEXT: vldr d17, [sp]
-; NEON-NEXT: vmov d16, r2, r3
-; NEON-NEXT: vuzp.32 q8, {{.*}}
-; NEON-NEXT: vstr d16, [r0]
-; NEON-NEXT: mov pc, lr
-; NONEON-LABEL: store_illegal_factor2:
-; NONEON: BB#0:
-; NONEON-NEXT: stm r0, {r1, r3}
-; NONEON-NEXT: mov pc, lr
-define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
-  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
-  store <3 x float> %tmp1, <3 x float>* %p, align 16
-  ret void
-}
-
-; NEON-LABEL: load_factor2_with_extract_user:
-; NEON: vld2.32 {d16, d17, d18, d19}, [r0:64]
-; NEON: vmov.32 r0, d16[1]
-; NONEON-LABEL: load_factor2_with_extract_user:
-; NONEON-NOT: vld2
-define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
-  %1 = load <8 x i32>, <8 x i32>* %a, align 8
-  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %3 = extractelement <8 x i32> %1, i32 2
-  ret i32 %3
-}
-
-; NEON-LABEL: store_general_mask_factor4:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefbeg:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefbeg:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefbeg(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefend:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefend:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefend(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmid:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefmid:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefmid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor4_undefmulti:
-; NEON: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor4_undefmulti:
-; NONEON-NOT: vst4.32
-define void @store_general_mask_factor4_undefmulti(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <8 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
-  store <8 x i32> %i.vec, <8 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_undefmultimid:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_undefmultimid(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undef_fail:
-; NEON-NOT: vst3.32
-; NONEON-LABEL: store_general_mask_factor3_undef_fail:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_undef_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_undeflane:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_undeflane:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_undeflane(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_endstart_fail:
-; NEON-NOT: vst3.32
-; NONEON-LABEL: store_general_mask_factor3_endstart_fail:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_endstart_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_endstart_pass:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_endstart_pass:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_endstart_pass(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_midstart_fail:
-; NEON-NOT: vst3.32
-; NONEON-LABEL: store_general_mask_factor3_midstart_fail:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_midstart_fail(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
-; NEON-LABEL: store_general_mask_factor3_midstart_pass:
-; NEON: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; NONEON-LABEL: store_general_mask_factor3_midstart_pass:
-; NONEON-NOT: vst3.32
-define void @store_general_mask_factor3_midstart_pass(i32* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
-  %base = bitcast i32* %ptr to <12 x i32>*
-  %i.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
-  store <12 x i32> %i.vec, <12 x i32>* %base, align 4
-  ret void
-}
-
--- a/test/CodeGen/SystemZ/pr31710.ll
+++ b/test/CodeGen/SystemZ/pr31710.ll
@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=s390x-redhat-linux | FileCheck %s
+;
+; Triggers a path in SelectionDAG's UpdateChains where a node is
+; deleted but we try to read it later (pr31710), invoking UB in
+; release mode or hitting an assert if they're enabled.
+
+; CHECK: btldata:
+define void @btldata(i64* %u0, i32** %p0, i32** %p1, i32** %p3, i32** %p5, i32** %p7) {
+entry:
+  %x0 = load i32*, i32** %p0, align 8, !tbaa !0
+  store i64 0, i64* %u0, align 8, !tbaa !4
+  %x1 = load i32*, i32** %p1, align 8, !tbaa !0
+  %x2 = load i32, i32* %x1, align 4, !tbaa !6
+  %x2ext = sext i32 %x2 to i64
+  store i32 %x2, i32* %x1, align 4, !tbaa !6
+  %x3 = load i32*, i32** %p3, align 8, !tbaa !0
+  %ptr = getelementptr inbounds i32, i32* %x3, i64 %x2ext
+  %x4 = load i32, i32* %ptr, align 4, !tbaa !6
+  %x4inc = add nsw i32 %x4, 1
+  store i32 %x4inc, i32* %ptr, align 4, !tbaa !6
+  store i64 undef, i64* %u0, align 8, !tbaa !4
+  %x5 = load i32*, i32** %p5, align 8, !tbaa !0
+  %x6 = load i32, i32* %x5, align 4, !tbaa !6
+  store i32 %x6, i32* %x5, align 4, !tbaa !6
+  %x7 = load i32*, i32** %p7, align 8, !tbaa !0
+  %x8 = load i32, i32* %x7, align 4, !tbaa !6
+  %x8inc = add nsw i32 %x8, 1
+  store i32 %x8inc, i32* %x7, align 4, !tbaa !6
+  ret void
+}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"any pointer", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"long", !2, i64 0}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !2, i64 0}
--- a/test/DebugInfo/Mips/tls.ll
+++ b/test/DebugInfo/Mips/tls.ll
@ -0,0 +1,22 @@
+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=asm < %s | FileCheck %s -check-prefix=CHECK-WORD
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=asm < %s | FileCheck %s -check-prefix=CHECK-DWORD
+
+@x = thread_local global i32 5, align 4, !dbg !0
+
+; CHECK-WORD: .dtprelword x+32768
+; CHECK-DWORD: .dtpreldword x+32768
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = !DIGlobalVariableExpression(var: !1)
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 4.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "tls.c", directory: "/tmp")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 4.0.0"}
--- a/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt
+++ b/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt
@ -1,38 +0,0 @@
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
-# XFAIL: *
-0x46 0x2f 0x79 0x32 # CHECK: c.eq.d $fcc1, $f15, $f15
-0x46 0x11 0xc5 0x32 # CHECK: c.eq.s $fcc5, $f24, $f17
-0x46 0x35 0x5c 0x30 # CHECK: c.f.d $fcc4, $f11, $f21
-0x46 0x07 0xf4 0x30 # CHECK: c.f.s $fcc4, $f30, $f7
-0x46 0x21 0x94 0x3e # CHECK: c.le.d $fcc4, $f18, $f1
-0x46 0x04 0xc6 0x3e # CHECK: c.le.s $fcc6, $f24, $f4
-0x46 0x23 0x4b 0x3c # CHECK: c.lt.d $fcc3, $f9, $f3
-0x46 0x0e 0x8a 0x3c # CHECK: c.lt.s $fcc2, $f17, $f14
-0x46 0x30 0xad 0x3d # CHECK: c.nge.d $fcc5, $f21, $f16
-0x46 0x08 0x5b 0x3d # CHECK: c.nge.s $fcc3, $f11, $f8
-0x46 0x17 0xfa 0x3b # CHECK: c.ngl.s $fcc2, $f31, $f23
-0x46 0x17 0x92 0x39 # CHECK: c.ngle.s $fcc2, $f18, $f23
-0x46 0x27 0xc4 0x3f # CHECK: c.ngt.d $fcc4, $f24, $f7
-0x46 0x0d 0x45 0x3f # CHECK: c.ngt.s $fcc5, $f8, $f13
-0x46 0x3f 0x82 0x36 # CHECK: c.ole.d $fcc2, $f16, $f31
-0x46 0x14 0x3b 0x36 # CHECK: c.ole.s $fcc3, $f7, $f20
-0x46 0x3c 0x9c 0x34 # CHECK: c.olt.d $fcc4, $f19, $f28
-0x46 0x07 0xa6 0x34 # CHECK: c.olt.s $fcc6, $f20, $f7
-0x46 0x27 0xfc 0x3a # CHECK: c.seq.d $fcc4, $f31, $f7
-0x46 0x19 0x0f 0x3a # CHECK: c.seq.s $fcc7, $f1, $f25
-0x46 0x39 0x6c 0x33 # CHECK: c.ueq.d $fcc4, $f13, $f25
-0x46 0x1e 0x1e 0x33 # CHECK: c.ueq.s $fcc6, $f3, $f30
-0x46 0x32 0xcf 0x37 # CHECK: c.ule.d $fcc7, $f25, $f18
-0x46 0x1e 0xaf 0x37 # CHECK: c.ule.s $fcc7, $f21, $f30
-0x46 0x31 0x36 0x35 # CHECK: c.ult.d $fcc6, $f6, $f17
-0x46 0x0a 0xc7 0x35 # CHECK: c.ult.s $fcc7, $f24, $f10
-0x46 0x38 0xbe 0x31 # CHECK: c.un.d $fcc6, $f23, $f24
-0x46 0x04 0xf1 0x31 # CHECK: c.un.s $fcc1, $f30, $f4
-0x4e 0x74 0xd4 0xa1 # CHECK: madd.d $f18, $f19, $f26, $f20
-0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
-0x4c 0x32 0xfa 0xa9 # CHECK: msub.d $f10, $f1, $f31, $f18
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
-0x4d 0x33 0x74 0xb1 # CHECK: nmadd.d $f18, $f9, $f14, $f19
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
@ -134,3 +134,12 @@
 0x7c 0x0b 0x66 0x24
 # CHECK: tlbsx 11, 12
 0x7c 0x0b 0x67 0x24
+
+# CHECK: mfpmr 5, 400
+0x7c 0xb0 0x62 0x9c
+# CHECK: mtpmr 400, 6
+0x7c 0xd0 0x63 0x9c
+# CHECK: icblc 0, 0, 8
+0x7c 0x00 0x41 0xcc
+# CHECK: icbtls 0, 0, 9
+0x7c 0x00 0x4b 0xcc
--- a/test/MC/MachO/ARM/no-tls-assert.ll
+++ b/test/MC/MachO/ARM/no-tls-assert.ll
@ -0,0 +1,28 @@
+; RUN: llc -filetype=obj -o - %s | llvm-objdump -section-headers - | FileCheck %s
+; This should not trigger the "Creating regular section after DWARF" assert.
+; CHECK: __text
+; CHECK: __thread_ptr  00000004
+target triple = "thumbv7-apple-ios9.0.0"
+
+@b = external thread_local global i32
+define i32* @func(i32 %a) !dbg !9 {
+  ret i32* @b
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6, !7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "r.ii", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = !{i32 1, !"PIC Level", i32 2}
+!9 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 4, type: !10, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{null, !12}
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !DILocalVariable(name: "a", arg: 1, scope: !9, file: !1, line: 4, type: !12)
+!14 = !DIExpression()
--- a/test/MC/Mips/micromips/valid.s
+++ b/test/MC/Mips/micromips/valid.s
@ -210,6 +210,42 @@ recip.s $f2, $f4            # CHECK: recip.s $f2, $f4       # encoding: [0x54,0x
 recip.d $f2, $f4            # CHECK: recip.d $f2, $f4       # encoding: [0x54,0x44,0x52,0x3b]
 rsqrt.s $f3, $f5            # CHECK: rsqrt.s $f3, $f5       # encoding: [0x54,0x65,0x02,0x3b]
 rsqrt.d $f2, $f4            # CHECK: rsqrt.d $f2, $f4       # encoding: [0x54,0x44,0x42,0x3b]
+c.eq.d   $fcc1, $f14, $f14  # CHECK: c.eq.d   $fcc1, $f14, $f14 # encoding: [0x55,0xce,0x24,0xbc]
+c.eq.s   $fcc5, $f24, $f17  # CHECK: c.eq.s   $fcc5, $f24, $f17 # encoding: [0x56,0x38,0xa0,0xbc]
+c.f.d    $fcc4, $f10, $f20  # CHECK: c.f.d    $fcc4, $f10, $f20 # encoding: [0x56,0x8a,0x84,0x3c]
+c.f.s    $fcc4, $f30, $f7   # CHECK: c.f.s    $fcc4, $f30, $f7  # encoding: [0x54,0xfe,0x80,0x3c]
+c.le.d   $fcc4, $f18, $f0   # CHECK: c.le.d   $fcc4, $f18, $f0  # encoding: [0x54,0x12,0x87,0xbc]
+c.le.s   $fcc6, $f24, $f4   # CHECK: c.le.s   $fcc6, $f24, $f4  # encoding: [0x54,0x98,0xc3,0xbc]
+c.lt.d   $fcc3, $f8, $f2    # CHECK: c.lt.d   $fcc3, $f8, $f2   # encoding: [0x54,0x48,0x67,0x3c]
+c.lt.s   $fcc2, $f17, $f14  # CHECK: c.lt.s   $fcc2, $f17, $f14 # encoding: [0x55,0xd1,0x43,0x3c]
+c.nge.d  $fcc5, $f20, $f16  # CHECK: c.nge.d  $fcc5, $f20, $f16 # encoding: [0x56,0x14,0xa7,0x7c]
+c.nge.s  $fcc3, $f11, $f8   # CHECK: c.nge.s  $fcc3, $f11, $f8  # encoding: [0x55,0x0b,0x63,0x7c]
+c.ngl.s  $fcc2, $f31, $f23  # CHECK: c.ngl.s  $fcc2, $f31, $f23 # encoding: [0x56,0xff,0x42,0xfc]
+c.ngle.s $fcc2, $f18, $f23  # CHECK: c.ngle.s $fcc2, $f18, $f23 # encoding: [0x56,0xf2,0x42,0x7c]
+c.ngl.d  $f28, $f28         # CHECK: c.ngl.d  $f28, $f28        # encoding: [0x57,0x9c,0x06,0xfc]
+c.ngle.d $f0, $f16          # CHECK: c.ngle.d $f0, $f16         # encoding: [0x56,0x00,0x06,0x7c]
+c.ngt.d  $fcc4, $f24, $f6   # CHECK: c.ngt.d  $fcc4, $f24, $f6  # encoding: [0x54,0xd8,0x87,0xfc]
+c.ngt.s  $fcc5, $f8, $f13   # CHECK: c.ngt.s  $fcc5, $f8, $f13  # encoding: [0x55,0xa8,0xa3,0xfc]
+c.ole.d  $fcc2, $f16, $f30  # CHECK: c.ole.d  $fcc2, $f16, $f30 # encoding: [0x57,0xd0,0x45,0xbc]
+c.ole.s  $fcc3, $f7, $f20   # CHECK: c.ole.s  $fcc3, $f7, $f20  # encoding: [0x56,0x87,0x61,0xbc]
+c.olt.d  $fcc4, $f18, $f28  # CHECK: c.olt.d  $fcc4, $f18, $f28 # encoding: [0x57,0x92,0x85,0x3c]
+c.olt.s  $fcc6, $f20, $f7   # CHECK: c.olt.s  $fcc6, $f20, $f7  # encoding: [0x54,0xf4,0xc1,0x3c]
+c.seq.d  $fcc4, $f30, $f6   # CHECK: c.seq.d  $fcc4, $f30, $f6  # encoding: [0x54,0xde,0x86,0xbc]
+c.seq.s  $fcc7, $f1, $f25   # CHECK: c.seq.s  $fcc7, $f1, $f25  # encoding: [0x57,0x21,0xe2,0xbc]
+c.sf.d   $f30, $f0          # CHECK: c.sf.d   $f30, $f0         # encoding: [0x54,0x1e,0x06,0x3c]
+c.sf.s   $f14, $f22         # CHECK: c.sf.s   $f14, $f22        # encoding: [0x56,0xce,0x02,0x3c]
+c.ueq.d  $fcc4, $f12, $f24  # CHECK: c.ueq.d  $fcc4, $f12, $f24 # encoding: [0x57,0x0c,0x84,0xfc]
+c.ueq.s  $fcc6, $f3, $f30   # CHECK: c.ueq.s  $fcc6, $f3, $f30  # encoding: [0x57,0xc3,0xc0,0xfc]
+c.ule.d  $fcc7, $f24, $f18  # CHECK: c.ule.d  $fcc7, $f24, $f18 # encoding: [0x56,0x58,0xe5,0xfc]
+c.ule.s  $fcc7, $f21, $f30  # CHECK: c.ule.s  $fcc7, $f21, $f30 # encoding: [0x57,0xd5,0xe1,0xfc]
+c.ult.d  $fcc6, $f6, $f16   # CHECK: c.ult.d  $fcc6, $f6, $f16  # encoding: [0x56,0x06,0xc5,0x7c]
+c.ult.s  $fcc7, $f24, $f10  # CHECK: c.ult.s  $fcc7, $f24, $f10 # encoding: [0x55,0x58,0xe1,0x7c]
+c.un.d   $fcc6, $f22, $f24  # CHECK: c.un.d   $fcc6, $f22, $f24 # encoding: [0x57,0x16,0xc4,0x7c]
+c.un.s   $fcc1, $f30, $f4   # CHECK: c.un.s   $fcc1, $f30, $f4  # encoding: [0x54,0x9e,0x20,0x7c]
+bc1t 8                      # CHECK: bc1t 8                     # encoding: [0x43,0xa0,0x00,0x04]
+bc1f 16                     # CHECK: bc1f 16                    # encoding: [0x43,0x80,0x00,0x08]
+bc1t $fcc1, 4               # CHECK: bc1t $fcc1, 4              # encoding: [0x43,0xa0,0x00,0x02]
+bc1f $fcc2, -20             # CHECK: bc1f $fcc2, -20            # encoding: [0x43,0x80,0xff,0xf6]
 sync                        # CHECK: sync                   # encoding: [0x00,0x00,0x6b,0x7c]
 sync 0                      # CHECK: sync 0                 # encoding: [0x00,0x00,0x6b,0x7c]
 sync 1                      # CHECK: sync 1                 # encoding: [0x00,0x01,0x6b,0x7c]
--- a/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
@ -6,8 +6,8 @@
 # RUN: FileCheck %s < %t1

 	.set noat
-        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        ldc2      $8,-21181($at)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected memory with 16-bit signed offset
        ldc2      $20,-1024($s2)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected memory with 16-bit signed offset
        ldl       $24,-4167($24)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
--- a/test/MC/Mips/mips1/invalid-mips4.s
+++ b/test/MC/Mips/mips1/invalid-mips4.s
@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -53,19 +53,19 @@
        ldxc1     $f8,$s7($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf      $gp,$8,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d    $f6,$f10,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s    $f23,$f5,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d    $f26,$f20,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt      $zero,$s4,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s    $f30,$f2,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz      $a1,$s6,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d    $f12,$f29,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
--- a/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
@ -44,3 +44,35 @@
        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.eq.s    $fcc1, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
--- a/test/MC/Mips/mips1/invalid-mips5.s
+++ b/test/MC/Mips/mips1/invalid-mips5.s
@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -52,19 +52,19 @@
        luxc1     $f19,$s6($s5)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf      $gp,$8,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d    $f6,$f10,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s    $f23,$f5,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d    $f27,$f21,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt      $zero,$s4,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s    $f30,$f2,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz      $a1,$s6,$a3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d    $f12,$f29,$a3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
--- a/test/MC/Mips/mips2/invalid-mips32.s
+++ b/test/MC/Mips/mips2/invalid-mips32.s
@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        clo       $11,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        clz       $sp,$gp         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        deret                     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -20,19 +20,19 @@
        maddu     $24,$s2         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        mfc0      $a2,$14,1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -41,3 +41,35 @@
        mtc0      $9,$29,3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        mul       $s0,$s4,$at     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        sync      1               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: s-type must be zero or unspecified for pre-MIPS32 ISAs
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
--- a/test/MC/Mips/mips2/invalid-mips32r2.s
+++ b/test/MC/Mips/mips2/invalid-mips32r2.s
@ -5,12 +5,44 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        clo     $t3,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        clz     $sp,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        cvt.l.d $f24,$f15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        cvt.l.s $f11,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        deret                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -29,19 +61,19 @@
        mfc0    $a2,$14,1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        mfhc1   $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf    $gp,$8,$fcc0        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf    $gp,$8,$fcc7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf    $gp,$8,$fcc7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d  $f6,$f11,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d  $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d  $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s  $f23,$f5,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s  $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s  $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn    $v1,$s1,$s0         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d  $f27,$f21,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s  $f12,$f0,$s7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt    $zero,$s4,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt    $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt    $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d  $f0,$f2,$fcc0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s  $f30,$f2,$fcc0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s  $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s  $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz    $a1,$s6,$t1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d  $f12,$f29,$t1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s  $f25,$f7,$v1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
--- a/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
@ -6,8 +6,8 @@
 # RUN: FileCheck %s < %t1

 	.set noat
-        bc1fl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1tl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1fl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1tl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        scd       $15,-8243($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected memory with 9-bit signed offset
        sdl       $a3,-20961($s8) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
        sdr       $11,-20423($12) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
--- a/test/MC/Mips/mips2/invalid-mips4.s
+++ b/test/MC/Mips/mips2/invalid-mips4.s
@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -50,19 +50,19 @@
        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        lwu       $s3,-24086($v1) # CHECK: :[[@LINE]]:23: error: expected memory with 12-bit signed offset
        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
--- a/test/MC/Mips/mips2/invalid-mips5.s
+++ b/test/MC/Mips/mips2/invalid-mips5.s
@ -5,8 +5,8 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -48,19 +48,19 @@
        luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf      $gp,$a0,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$a0,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$a0,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz      $a1,$s6,$a1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d    $f12,$f29,$a1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@ -71,3 +71,36 @@
        sdxc1     $f11,$a2($t2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        suxc1     $f12,$k1($t1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        swxc1     $f19,$t0($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+
--- a/test/MC/Mips/mips3/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips3/invalid-mips4-wrong-error.s
@ -1,10 +0,0 @@
-# Instructions that are invalid and are correctly rejected but use the wrong
-# error message at the moment.
-#
-# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
-# RUN:     2>%t1
-# RUN: FileCheck %s < %t1
-
-	.set noat
-        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
--- a/test/MC/Mips/mips3/invalid-mips4.s
+++ b/test/MC/Mips/mips3/invalid-mips4.s
@ -5,26 +5,58 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        ldxc1     $f8,$s7($15)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        sdxc1     $f11,$10($14)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        swxc1     $f19,$12($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
--- a/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
@ -44,3 +44,4 @@
        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+
--- a/test/MC/Mips/mips3/invalid-mips5.s
+++ b/test/MC/Mips/mips3/invalid-mips5.s
@ -5,28 +5,61 @@
 # RUN: FileCheck %s < %t1

        .set noat
-        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1f      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        bc1t      $fcc1, 4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
        ldxc1     $f8,$s7($t3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf      $gp,$8,$fcc0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.d    $f6,$f11,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movf.s    $f23,$f5,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt      $zero,$s4,$fcc0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movt.s    $f30,$f2,$fcc0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz      $a1,$s6,$a5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.d    $f12,$f29,$a5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        sdxc1     $f11,$a6($t2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        suxc1     $f12,$k1($t1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
        swxc1     $f19,$t0($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        c.eq.s    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.s     $fcc4, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.s    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.s    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.s   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.s   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.s  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.s   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.s   $fcc3, $f7, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.s   $fcc6, $f2, $f7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.s   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.s    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.s   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.s   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.s    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.eq.d    $fcc1, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.f.d     $fcc4, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.le.d    $fcc6, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.lt.d    $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.nge.d   $fcc3, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngl.d   $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngle.d  $fcc2, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ngt.d   $fcc5, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ole.d   $fcc3, $f8, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.olt.d   $fcc6, $f2, $f8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.seq.d   $fcc7, $f1, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.sf.d    $fcc4, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ueq.d   $fcc6, $f3, $f2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ule.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.ult.d   $fcc7, $f2, $f6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+        c.un.d    $fcc1, $f2, $f4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: non-zero fcc register doesn't exist in current ISA level
+
--- a/test/MC/Mips/mips32/valid-xfail.s
+++ b/test/MC/Mips/mips32/valid-xfail.s
@ -7,32 +7,20 @@
 # XFAIL: *

        .set noat
-        c.eq.d          $fcc1,$f15,$f15
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
-        c.nge.s         $fcc3,$f11,$f8
-        c.ngl.s         $fcc2,$f31,$f23
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
-        c.seq.s         $fcc7,$f1,$f25
-        c.ueq.d         $fcc4,$f13,$f25
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
-        c.un.s          $fcc1,$f30,$f4
+        c.eq.ps         $fcc5,$f0,$f9
+        c.f.ps          $fcc6,$f11,$f11
+        c.le.ps         $fcc1,$f7,$f20
+        c.lt.ps         $f19,$f5
+        c.nge.ps        $f1,$f26
+        c.ngl.ps        $f21,$f30
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ole.ps        $fcc7,$f21,$f8
+        c.olt.ps        $fcc3,$f7,$f16
+        c.seq.ps        $fcc6,$f31,$f14
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ult.ps        $fcc7,$f14,$f0
+        c.un.ps         $fcc4,$f2,$f26
        rorv            $13,$a3,$s5
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.w.d  $f11,$f25
        ceil.w.s  $f6,$f20
        cfc1      $s1,$21
--- a/test/MC/Mips/mips32r2/valid-xfail.s
+++ b/test/MC/Mips/mips32r2/valid-xfail.s
@ -12,50 +12,22 @@
        addqh.w         $s7,$s7,$k1
        addqh_r.w       $8,$v1,$zero
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        ceil.l.d        $f1,$f3
        ceil.l.s        $f18,$f13
        cfcmsa          $s6,$19
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.w.d  $f11,$f25
        ceil.w.s  $f6,$f20
        cfc1      $s1,$21
--- a/test/MC/Mips/mips32r3/valid-xfail.s
+++ b/test/MC/Mips/mips32r3/valid-xfail.s
@ -12,50 +12,22 @@
        addqh.w         $s7,$s7,$k1
        addqh_r.w       $8,$v1,$zero
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        ceil.l.d        $f1,$f3
        ceil.l.s        $f18,$f13
        cfcmsa          $s6,$19
--- a/test/MC/Mips/mips32r3/valid.s
+++ b/test/MC/Mips/mips32r3/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.w.d  $f11,$f25
        ceil.w.s  $f6,$f20
        cfc1      $s1,$21
--- a/test/MC/Mips/mips32r5/valid-xfail.s
+++ b/test/MC/Mips/mips32r5/valid-xfail.s
@ -12,50 +12,22 @@
        addqh.w         $s7,$s7,$k1
        addqh_r.w       $8,$v1,$zero
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        ceil.l.d        $f1,$f3
        ceil.l.s        $f18,$f13
        cfcmsa          $s6,$19
--- a/test/MC/Mips/mips32r5/valid.s
+++ b/test/MC/Mips/mips32r5/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.w.d  $f11,$f25
        ceil.w.s  $f6,$f20
        cfc1      $s1,$21
--- a/test/MC/Mips/mips4/valid-xfail.s
+++ b/test/MC/Mips/mips4/valid-xfail.s
@ -7,31 +7,19 @@
 # XFAIL: *

        .set noat
-        c.eq.d          $fcc1,$f15,$f15
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
-        c.nge.s         $fcc3,$f11,$f8
-        c.ngl.s         $fcc2,$f31,$f23
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
-        c.seq.s         $fcc7,$f1,$f25
-        c.ueq.d         $fcc4,$f13,$f25
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
-        c.un.s          $fcc1,$f30,$f4
+        c.eq.ps         $fcc5,$f0,$f9
+        c.f.ps          $fcc6,$f11,$f11
+        c.le.ps         $fcc1,$f7,$f20
+        c.lt.ps         $f19,$f5
+        c.nge.ps        $f1,$f26
+        c.ngl.ps        $f21,$f30
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ole.ps        $fcc7,$f21,$f8
+        c.olt.ps        $fcc3,$f7,$f16
+        c.seq.ps        $fcc6,$f31,$f14
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ult.ps        $fcc7,$f14,$f0
+        c.un.ps         $fcc4,$f2,$f26
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.l.d  $f1,$f3
        ceil.l.s  $f18,$f13
        ceil.w.d  $f11,$f25
--- a/test/MC/Mips/mips5/valid-xfail.s
+++ b/test/MC/Mips/mips5/valid-xfail.s
@ -10,50 +10,22 @@
        abs.ps          $f22,$f8
        add.ps          $f25,$f27,$f13
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        cvt.ps.s        $f3,$f18,$f19
        cvt.s.pl        $f30,$f1
        cvt.s.pu        $f14,$f25
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.l.d  $f1,$f3
        ceil.l.s  $f18,$f13
        ceil.w.d  $f11,$f25
--- a/test/MC/Mips/mips64/valid-xfail.s
+++ b/test/MC/Mips/mips64/valid-xfail.s
@ -13,50 +13,22 @@
        alnv.ob         $v31,$v23,$v30,$at
        alnv.ob         $v8,$v17,$v30,$a1
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        cvt.ps.s        $f3,$f18,$f19
        cvt.s.pl        $f30,$f1
        cvt.s.pu        $f14,$f25
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.l.d  $f1,$f3
        ceil.l.s  $f18,$f13
        ceil.w.d  $f11,$f25
--- a/test/MC/Mips/mips64r2/valid-xfail.s
+++ b/test/MC/Mips/mips64r2/valid-xfail.s
@ -12,50 +12,22 @@
        addqh.w         $s7,$s7,$k1
        addqh_r.w       $8,$v1,$zero
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        cvt.ps.s        $f3,$f18,$f19
        cvt.s.pl        $f30,$f1
        cvt.s.pu        $f14,$f25
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.l.d  $f1,$f3
        ceil.l.s  $f18,$f13
        ceil.w.d  $f11,$f25
--- a/test/MC/Mips/mips64r3/valid-xfail.s
+++ b/test/MC/Mips/mips64r3/valid-xfail.s
@ -15,50 +15,22 @@
        alnv.ob         $v31,$v23,$v30,$at
        alnv.ob         $v8,$v17,$v30,$a1
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        cvt.ps.s        $f3,$f18,$f19
        cvt.s.pl        $f30,$f1
        cvt.s.pu        $f14,$f25
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.l.d  $f1,$f3
        ceil.l.s  $f18,$f13
        ceil.w.d  $f11,$f25
--- a/test/MC/Mips/mips64r5/valid-xfail.s
+++ b/test/MC/Mips/mips64r5/valid-xfail.s
@ -15,50 +15,22 @@
        alnv.ob         $v31,$v23,$v30,$at
        alnv.ob         $v8,$v17,$v30,$a1
        alnv.ps         $f12,$f18,$f30,$12
-        c.eq.d          $fcc1,$f15,$f15
        c.eq.ps         $fcc5,$f0,$f9
-        c.eq.s          $fcc5,$f24,$f17
-        c.f.d           $fcc4,$f11,$f21
        c.f.ps          $fcc6,$f11,$f11
-        c.f.s           $fcc4,$f30,$f7
-        c.le.d          $fcc4,$f18,$f1
        c.le.ps         $fcc1,$f7,$f20
-        c.le.s          $fcc6,$f24,$f4
-        c.lt.d          $fcc3,$f9,$f3
        c.lt.ps         $f19,$f5
-        c.lt.s          $fcc2,$f17,$f14
-        c.nge.d         $fcc5,$f21,$f16
        c.nge.ps        $f1,$f26
-        c.nge.s         $fcc3,$f11,$f8
        c.ngl.ps        $f21,$f30
-        c.ngl.s         $fcc2,$f31,$f23
        c.ngle.ps       $fcc7,$f12,$f20
-        c.ngle.s        $fcc2,$f18,$f23
-        c.ngt.d         $fcc4,$f24,$f7
        c.ngt.ps        $fcc5,$f30,$f6
-        c.ngt.s         $fcc5,$f8,$f13
-        c.ole.d         $fcc2,$f16,$f31
        c.ole.ps        $fcc7,$f21,$f8
-        c.ole.s         $fcc3,$f7,$f20
-        c.olt.d         $fcc4,$f19,$f28
        c.olt.ps        $fcc3,$f7,$f16
-        c.olt.s         $fcc6,$f20,$f7
-        c.seq.d         $fcc4,$f31,$f7
        c.seq.ps        $fcc6,$f31,$f14
-        c.seq.s         $fcc7,$f1,$f25
        c.sf.ps         $fcc6,$f4,$f6
-        c.ueq.d         $fcc4,$f13,$f25
        c.ueq.ps        $fcc1,$f5,$f29
-        c.ueq.s         $fcc6,$f3,$f30
-        c.ule.d         $fcc7,$f25,$f18
        c.ule.ps        $fcc6,$f17,$f3
-        c.ule.s         $fcc7,$f21,$f30
-        c.ult.d         $fcc6,$f6,$f17
        c.ult.ps        $fcc7,$f14,$f0
-        c.ult.s         $fcc7,$f24,$f10
-        c.un.d          $fcc6,$f23,$f24
        c.un.ps         $fcc4,$f2,$f26
-        c.un.s          $fcc1,$f30,$f4
        cvt.ps.s        $f3,$f18,$f19
        cvt.s.pl        $f30,$f1
        cvt.s.pu        $f14,$f25
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@ -41,10 +41,38 @@ a:
        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
-        c.ngl.d   $f29,$f29
-        c.ngle.d  $f0,$f16
-        c.sf.d    $f30,$f0
-        c.sf.s    $f14,$f22
+        c.eq.d    $fcc1, $f14, $f14    # CHECK: c.eq.d    $fcc1, $f14, $f14       # encoding: [0x46,0x2e,0x71,0x32]
+        c.eq.s    $fcc5, $f24, $f17    # CHECK: c.eq.s    $fcc5, $f24, $f17       # encoding: [0x46,0x11,0xc5,0x32]
+        c.f.d     $fcc4, $f10, $f20    # CHECK: c.f.d     $fcc4, $f10, $f20       # encoding: [0x46,0x34,0x54,0x30]
+        c.f.s     $fcc4, $f30, $f7     # CHECK: c.f.s     $fcc4, $f30, $f7        # encoding: [0x46,0x07,0xf4,0x30]
+        c.le.d    $fcc4, $f18, $f0     # CHECK: c.le.d    $fcc4, $f18, $f0        # encoding: [0x46,0x20,0x94,0x3e]
+        c.le.s    $fcc6, $f24, $f4     # CHECK: c.le.s    $fcc6, $f24, $f4        # encoding: [0x46,0x04,0xc6,0x3e]
+        c.lt.d    $fcc3, $f8, $f2      # CHECK: c.lt.d    $fcc3, $f8, $f2         # encoding: [0x46,0x22,0x43,0x3c]
+        c.lt.s    $fcc2, $f17, $f14    # CHECK: c.lt.s    $fcc2, $f17, $f14       # encoding: [0x46,0x0e,0x8a,0x3c]
+        c.nge.d   $fcc5, $f20, $f16    # CHECK: c.nge.d   $fcc5, $f20, $f16       # encoding: [0x46,0x30,0xa5,0x3d]
+        c.nge.s   $fcc3, $f11, $f8     # CHECK: c.nge.s   $fcc3, $f11, $f8        # encoding: [0x46,0x08,0x5b,0x3d]
+        c.ngl.s   $fcc2, $f31, $f23    # CHECK: c.ngl.s   $fcc2, $f31, $f23       # encoding: [0x46,0x17,0xfa,0x3b]
+        c.ngle.s  $fcc2, $f18, $f23    # CHECK: c.ngle.s  $fcc2, $f18, $f23       # encoding: [0x46,0x17,0x92,0x39]
+        c.ngl.d   $f28, $f28           # CHECK: c.ngl.d   $f28, $f28              # encoding: [0x46,0x3c,0xe0,0x3b]
+        c.ngle.d  $f0, $f16            # CHECK: c.ngle.d  $f0, $f16               # encoding: [0x46,0x30,0x00,0x39]
+        c.ngt.d   $fcc4, $f24, $f6     # CHECK: c.ngt.d   $fcc4, $f24, $f6        # encoding: [0x46,0x26,0xc4,0x3f]
+        c.ngt.s   $fcc5, $f8, $f13     # CHECK: c.ngt.s   $fcc5, $f8, $f13        # encoding: [0x46,0x0d,0x45,0x3f]
+        c.ole.d   $fcc2, $f16, $f30    # CHECK: c.ole.d   $fcc2, $f16, $f30       # encoding: [0x46,0x3e,0x82,0x36]
+        c.ole.s   $fcc3, $f7, $f20     # CHECK: c.ole.s   $fcc3, $f7, $f20        # encoding: [0x46,0x14,0x3b,0x36]
+        c.olt.d   $fcc4, $f18, $f28    # CHECK: c.olt.d   $fcc4, $f18, $f28       # encoding: [0x46,0x3c,0x94,0x34]
+        c.olt.s   $fcc6, $f20, $f7     # CHECK: c.olt.s   $fcc6, $f20, $f7        # encoding: [0x46,0x07,0xa6,0x34]
+        c.seq.d   $fcc4, $f30, $f6     # CHECK: c.seq.d   $fcc4, $f30, $f6        # encoding: [0x46,0x26,0xf4,0x3a]
+        c.seq.s   $fcc7, $f1, $f25     # CHECK: c.seq.s   $fcc7, $f1, $f25        # encoding: [0x46,0x19,0x0f,0x3a]
+        c.sf.d    $f30, $f0            # CHECK: c.sf.d    $f30, $f0               # encoding: [0x46,0x20,0xf0,0x38]
+        c.sf.s    $f14, $f22           # CHECK: c.sf.s    $f14, $f22              # encoding: [0x46,0x16,0x70,0x38]
+        c.ueq.d   $fcc4, $f12, $f24    # CHECK: c.ueq.d   $fcc4, $f12, $f24       # encoding: [0x46,0x38,0x64,0x33]
+        c.ueq.s   $fcc6, $f3, $f30     # CHECK: c.ueq.s   $fcc6, $f3, $f30        # encoding: [0x46,0x1e,0x1e,0x33]
+        c.ule.d   $fcc7, $f24, $f18    # CHECK: c.ule.d   $fcc7, $f24, $f18       # encoding: [0x46,0x32,0xc7,0x37]
+        c.ule.s   $fcc7, $f21, $f30    # CHECK: c.ule.s   $fcc7, $f21, $f30       # encoding: [0x46,0x1e,0xaf,0x37]
+        c.ult.d   $fcc6, $f6, $f16     # CHECK: c.ult.d   $fcc6, $f6, $f16        # encoding: [0x46,0x30,0x36,0x35]
+        c.ult.s   $fcc7, $f24, $f10    # CHECK: c.ult.s   $fcc7, $f24, $f10       # encoding: [0x46,0x0a,0xc7,0x35]
+        c.un.d    $fcc6, $f22, $f24    # CHECK: c.un.d    $fcc6, $f22, $f24       # encoding: [0x46,0x38,0xb6,0x31]
+        c.un.s    $fcc1, $f30, $f4     # CHECK: c.un.s    $fcc1, $f30, $f4        # encoding: [0x46,0x04,0xf1,0x31]
        ceil.l.d  $f1,$f3
        ceil.l.s  $f18,$f13
        ceil.w.d  $f11,$f25
--- a/test/MC/PowerPC/ppc64-encoding-bookIII.s
+++ b/test/MC/PowerPC/ppc64-encoding-bookIII.s
@ -197,3 +197,16 @@
 # CHECK-BE: tlbsx 11, 12                    # encoding: [0x7c,0x0b,0x67,0x24]
 # CHECK-LE: tlbsx 11, 12                    # encoding: [0x24,0x67,0x0b,0x7c]
            tlbsx %r11, %r12
+
+# CHECK-BE: mfpmr 5, 400                    # encoding: [0x7c,0xb0,0x62,0x9c]
+# CHECK-LE: mfpmr 5, 400                    # encoding: [0x9c,0x62,0xb0,0x7c]
+            mfpmr 5, 400
+# CHECK-BE: mtpmr 400, 6                    # encoding: [0x7c,0xd0,0x63,0x9c]
+# CHECK-LE: mtpmr 400, 6                    # encoding: [0x9c,0x63,0xd0,0x7c]
+            mtpmr 400, 6
+# CHECK-BE: icblc 0, 0, 8                      # encoding: [0x7c,0x00,0x41,0xcc]
+# CHECK-LE: icblc 0, 0, 8                      # encoding: [0xcc,0x41,0x00,0x7c]
+            icblc 0, 0, 8
+# CHECK-BE: icbtls 0, 0, 9                     # encoding: [0x7c,0x00,0x4b,0xcc]
+# CHECK-LE: icbtls 0, 0, 9                     # encoding: [0xcc,0x4b,0x00,0x7c]
+            icbtls 0, 0, 9
--- a/test/Transforms/IPConstantProp/naked-return.ll
+++ b/test/Transforms/IPConstantProp/naked-return.ll
@ -0,0 +1,28 @@
+; RUN: opt -ipsccp -S %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc19.0.24215"
+
+define i32 @dipsy(i32, i32) local_unnamed_addr #0 {
+BasicBlock0:
+  call void asm "\0D\0Apushl %ebp\0D\0Amovl 8(%esp),%eax\0D\0Amovl 12(%esp), %ebp\0D\0Acalll *%eax\0D\0Apopl %ebp\0D\0Aretl\0D\0A", ""()
+  ret i32 0
+}
+
+define void @tinkywinky(i32, i32, i32) local_unnamed_addr #0 {
+BasicBlock1:
+  call void asm "\0D\0A    movl 12(%esp), %ebp\0D\0A    movl 4(%esp), %eax\0D\0A    movl 8(%esp), %esp\0D\0A    jmpl *%eax\0D\0A", ""()
+  ret void
+}
+
+define void @patatino(i32, i32, i32) local_unnamed_addr #1 {
+bb:
+  %3 = tail call i32 @dipsy(i32 %0, i32 %1) #0
+; Check that we don't accidentally propagate zero.
+; CHECK: @tinkywinky(i32 %3, i32 %2, i32 %1) #0
+  tail call void @tinkywinky(i32 %3, i32 %2, i32 %1) #0
+  ret void
+}
+
+attributes #0 = { naked noinline optnone }
+attributes #1 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
--- a/test/Transforms/InstCombine/indexed-gep-compares.ll
+++ b/test/Transforms/InstCombine/indexed-gep-compares.ll
@ -188,3 +188,20 @@ bb10:


 declare i32 @__gxx_personality_v0(...)
+
+define i1 @test8(i64* %in, i64 %offset) {
+entry:
+
+ %ld = load i64, i64* %in, align 8
+ %casti8 = inttoptr i64 %ld to i8*
+ %gepi8 = getelementptr inbounds i8, i8* %casti8, i64 %offset
+ %cast = bitcast i8* %gepi8 to i32**
+ %ptrcast = inttoptr i64 %ld to i32**
+ %gepi32 = getelementptr inbounds i32*, i32** %ptrcast, i64 1
+ %cmp = icmp eq i32** %gepi32, %cast
+ ret i1 %cmp
+
+
+; CHECK-LABEL: @test8(
+; CHECK-NOT: icmp eq i32 %{{[0-9A-Za-z.]+}}, 1
+}
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@ -219,3 +219,22 @@ entry:
  store %swift.error* %err.res, %swift.error** %err, align 8
  ret void
 }
+
+; Make sure we preseve the type of the store to a swifterror pointer.
+; CHECK-LABEL: @test19(
+; CHECK: [[A:%.*]] = alloca
+; CHECK: call
+; CHECK: [[BC:%.*]] = bitcast i8** [[A]] to
+; CHECK: [[ERRVAL:%.*]] =  load {{.*}}[[BC]]
+; CHECK: store {{.*}}[[ERRVAL]]
+; CHECK: ret
+declare void @initi8(i8**)
+define void @test19(%swift.error** swifterror %err) {
+entry:
+  %tmp = alloca i8*, align 8
+  call void @initi8(i8** %tmp)
+  %swifterror = bitcast i8** %tmp to %swift.error**
+  %err.res = load %swift.error*, %swift.error** %swifterror, align 8
+  store %swift.error* %err.res, %swift.error** %err, align 8
+  ret void
+}
--- a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-extract-user.ll
+++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-extract-user.ll
@ -0,0 +1,113 @@
+; RUN: opt < %s -interleaved-access -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define void @extract_user_basic(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_basic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_multi(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; CHECK-NEXT:    [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br label %if.merge
+
+if.merge:
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  ret void
+}
+
+define void @extract_user_multi_no_dom(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi_no_dom(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_wrong_const_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_wrong_const_index(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 1
+  ret void
+}
+
+define void @extract_user_undef_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_undef_index(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 undef
+  ret void
+}
+
+define void @extract_user_var_index(<8 x i32>* %ptr, i32 %i) {
+; CHECK-LABEL: @extract_user_var_index(
+; CHECK-NOT:     @llvm.aarch64.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 %i
+  ret void
+}
--- a/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
+++ b/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
@ -0,0 +1,567 @@
+; RUN: opt < %s -interleaved-access -S | FileCheck %s -check-prefix=NEON
+; RUN: opt < %s -mattr=-neon -interleaved-access -S | FileCheck %s -check-prefix=NO_NEON
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define void @load_factor2(<16 x i8>* %ptr) {
+; NEON-LABEL:    @load_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i8>* %ptr to <8 x i8>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret void
+}
+
+define void @load_factor3(<12 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  ret void
+}
+
+define void @load_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  ret void
+}
+
+define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
+; NEON-LABEL:    @store_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <16 x i8>* %ptr to <8 x i8>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v8i8.p0v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8>* [[TMP3]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_factor3(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_factor4(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_ptrvec_factor2(<4 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <4 x i32*>, <4 x i32*>* %ptr, align 4
+  %v0 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
+  %v1 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 1, i32 3>
+  ret void
+}
+
+define void @load_ptrvec_factor3(<6 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <6 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <6 x i32*>, <6 x i32*>* %ptr, align 4
+  %v0 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @load_ptrvec_factor4(<8 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT:       [[TMP9:%.*]] = inttoptr <2 x i64> [[TMP8]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32*>, <8 x i32*>* %ptr, align 4
+  %v0 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 0, i32 4>
+  %v1 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
+  %v2 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 2, i32 6>
+  %v3 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
+  ret void
+}
+
+define void @store_ptrvec_factor2(<4 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
+; NEON-LABEL:    @store_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = ptrtoint <2 x i32*> %v0 to <2 x i64>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <2 x i32*> %v1 to <2 x i64>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <4 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v2i64.p0v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32*> %interleaved.vec, <4 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor3(<6 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
+; NEON-LABEL:    @store_ptrvec_factor3(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i64>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i64>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP6:%.*]] = bitcast <6 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v2i64.p0v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+  store <6 x i32*> %interleaved.vec, <6 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor4(<8 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
+; NEON-LABEL:    @store_ptrvec_factor4(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i64>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i64>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 6, i32 7>
+; NEON-NEXT:       [[TMP7:%.*]] = bitcast <8 x i32*>* %ptr to <2 x i64>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i64.p0v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+  store <8 x i32*> %interleaved.vec, <8 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @load_undef_mask_factor2(<8 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
+  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
+  ret void
+}
+
+define void @load_undef_mask_factor3(<12 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+  ret void
+}
+
+define void @load_undef_mask_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP1]])
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
+  ret void
+}
+
+define void @store_undef_mask_factor2(<8 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; NEON-LABEL:    @store_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <8 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_undef_mask_factor3(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_undef_mask_factor4(
+; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <16 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_illegal_factor2(<3 x float>* %ptr) nounwind {
+; NEON-LABEL:    @load_illegal_factor2(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @load_illegal_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <3 x float>, <3 x float>* %ptr, align 16
+  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  ret void
+}
+
+define void @store_illegal_factor2(<3 x float>* %ptr, <3 x float> %v0) nounwind {
+; NEON-LABEL:    @store_illegal_factor2(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_illegal_factor2(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  store <3 x float> %interleaved.vec, <3 x float>* %ptr, align 16
+  ret void
+}
+
+define void @store_general_mask_factor4(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefbeg(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefbeg(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefbeg(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefend(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefend(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefend(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmid(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmid(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmid(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmulti(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmulti(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       [[TMP5:%.*]] = bitcast <8 x i32>* %ptr to <2 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmulti(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undefmultimid(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undefmultimid(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undefmultimid(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undef_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undef_fail(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undef_fail(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undeflane(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undeflane(
+; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       [[TMP4:%.*]] = bitcast <12 x i32>* %ptr to <4 x i32>*
+; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]])
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undeflane(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_negativestart(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_negativestart(
+; NEON-NOT:        @llvm.aarch64.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_negativestart(
+; NO_NEON-NOT:     @llvm.aarch64.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+@g = external global <4 x float>
+
+; The following does not give a valid interleaved store
+; NEON-LABEL: define void @no_interleave
+; NEON-NOT: call void @llvm.aarch64.neon.st2
+; NEON: shufflevector
+; NEON: store
+; NEON: ret void
+; NO_NEON-LABEL: define void @no_interleave
+; NO_NEON: shufflevector
+; NO_NEON: store
+; NO_NEON: ret void
+define void @no_interleave(<4 x float> %a0) {
+  %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 3, i32 7, i32 undef>
+  store <4 x float> %v0, <4 x float>* @g, align 16
+  ret void
+}
--- a/test/Transforms/InterleavedAccess/AArch64/lit.local.cfg
+++ b/test/Transforms/InterleavedAccess/AArch64/lit.local.cfg
@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+  config.unsupported = True
--- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-extract-user.ll
+++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-extract-user.ll
@ -0,0 +1,113 @@
+; RUN: opt < %s -mattr=+neon -interleaved-access -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "arm---eabi"
+
+define void @extract_user_basic(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_basic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; CHECK-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP0]], i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_multi(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; CHECK-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP0]], i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.merge
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT:    br label %if.merge
+; CHECK:       if.merge:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i64 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br label %if.merge
+
+if.merge:
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  ret void
+}
+
+define void @extract_user_multi_no_dom(<8 x i32>* %ptr, i1 %c) {
+; CHECK-LABEL: @extract_user_multi_no_dom(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 0
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e1 = extractelement <8 x i32> %interleaved.vec, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+define void @extract_user_wrong_const_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_wrong_const_index(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 1
+  ret void
+}
+
+define void @extract_user_undef_index(<8 x i32>* %ptr) {
+; CHECK-LABEL: @extract_user_undef_index(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 undef
+  ret void
+}
+
+define void @extract_user_var_index(<8 x i32>* %ptr, i32 %i) {
+; CHECK-LABEL: @extract_user_var_index(
+; CHECK-NOT:     @llvm.arm.neon
+; CHECK:         ret void
+;
+entry:
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 8
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %e0 = extractelement <8 x i32> %interleaved.vec, i32 %i
+  ret void
+}
--- a/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
+++ b/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
@ -0,0 +1,646 @@
+; RUN: opt < %s -mattr=+neon -interleaved-access -S | FileCheck %s -check-prefix=NEON
+; RUN: opt < %s -interleaved-access -S | FileCheck %s -check-prefix=NO_NEON
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "arm---eabi"
+
+define void @load_factor2(<16 x i8>* %ptr) {
+; NEON-LABEL:    @load_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i8>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret void
+}
+
+define void @load_factor3(<6 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <6 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <6 x i32>, <6 x i32>* %ptr, align 4
+  %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @load_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  ret void
+}
+
+define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
+; NEON-LABEL:    @store_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i8>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_factor3(
+; NEON:            [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_factor4(
+; NEON:            [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_ptrvec_factor2(<4 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <4 x i32*>, <4 x i32*>* %ptr, align 4
+  %v0 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
+  ret void
+}
+
+define void @load_ptrvec_factor3(<6 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <6 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <6 x i32*>, <6 x i32*>* %ptr, align 4
+  %v0 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @load_ptrvec_factor4(<8 x i32*>* %ptr) {
+; NEON-LABEL:    @load_ptrvec_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x i32*>
+; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x i32*>
+; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       [[TMP9:%.*]] = inttoptr <2 x i32> [[TMP8]] to <2 x i32*>
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32*>, <8 x i32*>* %ptr, align 4
+  %v0 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 0, i32 4>
+  %v1 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
+  %v2 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 2, i32 6>
+  %v3 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
+  ret void
+}
+
+define void @store_ptrvec_factor2(<4 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
+; NEON-LABEL:    @store_ptrvec_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = ptrtoint <2 x i32*> %v0 to <2 x i32>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <2 x i32*> %v1 to <2 x i32>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <4 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32*> %interleaved.vec, <4 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor3(<6 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
+; NEON-LABEL:    @store_ptrvec_factor3(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i32>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i32>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <6 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+  store <6 x i32*> %interleaved.vec, <6 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @store_ptrvec_factor4(<8 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
+; NEON-LABEL:    @store_ptrvec_factor4(
+; NEON:            [[TMP1:%.*]] = ptrtoint <4 x i32*> %s0 to <4 x i32>
+; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x i32*> %s1 to <4 x i32>
+; NEON-NEXT:       [[TMP3:%.*]] = bitcast <8 x i32*>* %ptr to i8*
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 6, i32 7>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_ptrvec_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+  store <8 x i32*> %interleaved.vec, <8 x i32*>* %ptr, align 4
+  ret void
+}
+
+define void @load_undef_mask_factor2(<8 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
+  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
+  ret void
+}
+
+define void @load_undef_mask_factor3(<12 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+  ret void
+}
+
+define void @load_undef_mask_factor4(<16 x i32>* %ptr) {
+; NEON-LABEL:    @load_undef_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
+  ret void
+}
+
+define void @store_undef_mask_factor2(<8 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; NEON-LABEL:    @store_undef_mask_factor2(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; NEON-LABEL:    @store_undef_mask_factor3(
+; NEON:            [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_undef_mask_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; NEON-LABEL:    @store_undef_mask_factor4(
+; NEON:            [[TMP1:%.*]] = bitcast <16 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_undef_mask_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @load_address_space(<4 x i32> addrspace(1)* %ptr) {
+; NEON-LABEL:    @load_address_space(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32> addrspace(1)* %ptr to i8 addrspace(1)*
+; NEON-NEXT:       [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p1i8(i8 addrspace(1)* [[TMP1]], i32 0)
+; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
+; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
+; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @load_address_space(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <4 x i32>, <4 x i32> addrspace(1)* %ptr
+  %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 2, i32 5>
+  ret void
+}
+
+define void @store_address_space(<4 x i32> addrspace(1)* %ptr, <2 x i32> %v0, <2 x i32> %v1) {
+; NEON-LABEL:    @store_address_space(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <4 x i32> addrspace(1)* %ptr to i8 addrspace(1)*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 2, i32 3>
+; NEON-NEXT:       call void @llvm.arm.neon.vst2.p1i8.v2i32(i8 addrspace(1)* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], i32 0)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_address_space(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <2 x i32> %v0, <2 x i32> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32> %interleaved.vec, <4 x i32> addrspace(1)* %ptr
+  ret void
+}
+
+define void @load_illegal_factor2(<3 x float>* %ptr) nounwind {
+; NEON-LABEL:    @load_illegal_factor2(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @load_illegal_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = load <3 x float>, <3 x float>* %ptr, align 16
+  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  ret void
+}
+
+define void @store_illegal_factor2(<3 x float>* %ptr, <3 x float> %v0) nounwind {
+; NEON-LABEL:    @store_illegal_factor2(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_illegal_factor2(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  store <3 x float> %interleaved.vec, <3 x float>* %ptr, align 16
+  ret void
+}
+
+define void @store_general_mask_factor4(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefbeg(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefbeg(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefbeg(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefend(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefend(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefend(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmid(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmid(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmid(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor4_undefmulti(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor4_undefmulti(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <8 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
+; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
+; NEON-NEXT:       call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor4_undefmulti(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undefmultimid(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undefmultimid(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undefmultimid(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undef_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undef_fail(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undef_fail(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_undeflane(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_undeflane(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_undeflane(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_endstart_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_endstart_fail(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_endstart_fail(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_endstart_pass(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_endstart_pass(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_endstart_pass(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_midstart_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_midstart_fail(
+; NEON-NOT:        @llvm.arm.neon
+; NEON:            ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_midstart_fail(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+define void @store_general_mask_factor3_midstart_pass(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
+; NEON-LABEL:    @store_general_mask_factor3_midstart_pass(
+; NEON-NEXT:       [[TMP1:%.*]] = bitcast <12 x i32>* %ptr to i8*
+; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; NEON-NEXT:       call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
+; NEON-NEXT:       ret void
+; NO_NEON-LABEL: @store_general_mask_factor3_midstart_pass(
+; NO_NEON-NOT:     @llvm.arm.neon
+; NO_NEON:         ret void
+;
+  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
+  ret void
+}
+
+@g = external global <4 x float>
+
+; The following does not give a valid interleaved store
+; NEON-LABEL: define void @no_interleave
+; NEON-NOT: call void @llvm.arm.neon.vst2
+; NEON: shufflevector
+; NEON: store
+; NEON: ret void
+; NO_NEON-LABEL: define void @no_interleave
+; NO_NEON: shufflevector
+; NO_NEON: store
+; NO_NEON: ret void
+define void @no_interleave(<4 x float> %a0) {
+  %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
+  store <4 x float> %v0, <4 x float>* @g, align 16
+  ret void
+}
--- a/test/Transforms/InterleavedAccess/ARM/lit.local.cfg
+++ b/test/Transforms/InterleavedAccess/ARM/lit.local.cfg
@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+  config.unsupported = True
--- a/test/tools/llvm-symbolizer/coff-exports.test
+++ b/test/tools/llvm-symbolizer/coff-exports.test
@ -2,8 +2,10 @@ RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
 RUN: 	 | llvm-symbolizer --inlining --relative-address -obj="%p/Inputs/coff-exports.exe" \
 RUN:	 | FileCheck %s

-This test relies on UnDecorateSymbolName, which is Windows-only.
+This test relies on UnDecorateSymbolName, which is Win32-only.
 REQUIRES: system-windows
+REQUIRES: target-windows
+FIXME: This test depends on host, not target.

 ADDR: 0x500A
 ADDR: 0x5038