Vendor import of llvm release_80 branch r353167:

https://llvm.org/svn/llvm-project/llvm/branches/release_80@353167
svn path=/vendor/llvm/dist-release_80/; revision=343794 svn path=/vendor/llvm/llvm-release_80-r353167/; revision=343795; tag=vendor/llvm/llvm-release_80-r353167
2019-02-05 18:38:58 +00:00 · 2019-02-05 18:38:58 +00:00 · e79719ce60 · 2020-12-20 02:59:44 +00:00
commit e79719ce60
parent 3edec5c15a
61 changed files with 1535 additions and 263 deletions
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@ -1280,7 +1280,6 @@ function(get_llvm_lit_path base_dir file_name)
  cmake_parse_arguments(ARG "ALLOW_EXTERNAL" "" "" ${ARGN})

  if (ARG_ALLOW_EXTERNAL)
-    set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_EXTERNAL_LIT}")
    set (LLVM_EXTERNAL_LIT "" CACHE STRING "Command used to spawn lit")
    if ("${LLVM_EXTERNAL_LIT}" STREQUAL "")
      set(LLVM_EXTERNAL_LIT "${LLVM_DEFAULT_EXTERNAL_LIT}")
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@ -48,6 +48,12 @@ Non-comprehensive list of changes in this release
  functionality.  See `Writing an LLVM Pass
  <WritingAnLLVMPass.html#setting-up-the-build-environment>`_.

+* For MinGW, references to data variables that might need to be imported
+  from a dll are accessed via a stub, to allow the linker to convert it to
+  a dllimport if needed.
+
+* Added support for labels as offsets in ``.reloc`` directive.
+
 .. NOTE
   If you would like to document a larger change, then you can add a
   subsection about it right here. You can copy the following boilerplate
@ -62,17 +68,44 @@ Changes to the LLVM IR
 ----------------------


+Changes to the AArch64 Target
+-----------------------------
+
+* Added support for the ``.arch_extension`` assembler directive, just like
+  on ARM.
+
+
 Changes to the ARM Backend
 --------------------------

 During this release ...


+Changes to the Hexagon Target
+--------------------------
+
+* Added support for Hexagon/HVX V66 ISA.
+
 Changes to the MIPS Target
 --------------------------

- During this release ...
+* Improved support of GlobalISel instruction selection framework.

+* Implemented emission of ``R_MIPS_JALR`` and ``R_MICROMIPS_JALR``
+  relocations. These relocations provide hints to a linker for optimization
+  of jumps to protected symbols.
+
+* ORC JIT has been supported for MIPS and MIPS64 architectures.
+
+* Assembler now suggests alternative MIPS instruction mnemonics when
+  an invalid one is specified.
+
+* Improved support for MIPS N32 ABI.
+
+* Added new instructions (``pll.ps``, ``plu.ps``, ``cvt.s.pu``,
+  ``cvt.s.pl``, ``cvt.ps``, ``sigrie``).
+
+* Numerous bug fixes and code cleanups.

 Changes to the PowerPC Target
 -----------------------------
@ -123,7 +156,16 @@ Changes to the DAG infrastructure
 External Open Source Projects Using LLVM 8
 ==========================================

-* A project...
+Zig Programming Language
+------------------------
+
+`Zig <https://ziglang.org>`_  is a system programming language intended to be
+an alternative to C. It provides high level features such as generics, compile
+time function execution, and partial evaluation, while exposing low level LLVM
+IR features such as aliases and intrinsics. Zig uses Clang to provide automatic
+import of .h symbols, including inline functions and simple macros. Zig uses
+LLD combined with lazily building compiler-rt to provide out-of-the-box
+cross-compiling for all supported targets.


 Additional Information
--- a/include/llvm/Support/JSON.h
+++ b/include/llvm/Support/JSON.h
@ -481,6 +481,7 @@ class Value {
  mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
                                      std::string, json::Array, json::Object>
      Union;
+  friend bool operator==(const Value &, const Value &);
 };

 bool operator==(const Value &, const Value &);
--- a/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/include/llvm/Transforms/Utils/FunctionImportUtils.h
@ -44,6 +44,11 @@ class FunctionImportGlobalProcessing {
  /// to promote any non-renamable values.
  SmallPtrSet<GlobalValue *, 8> Used;

+  /// Keep track of any COMDATs that require renaming (because COMDAT
+  /// leader was promoted and renamed). Maps from original COMDAT to one
+  /// with new name.
+  DenseMap<const Comdat *, Comdat *> RenamedComdats;
+
  /// Check if we should promote the given local value to global scope.
  bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);

--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@ -1836,7 +1836,10 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,

  unsigned Index = 0;
  SmallVector<TypeIndex, 8> ArgTypeIndices;
-  TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+  TypeIndex ReturnTypeIndex = TypeIndex::Void();
+  if (ReturnAndArgs.size() > Index) {
+    ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+  }

  // If the first argument is a pointer type and this isn't a static method,
  // treat it as the special 'this' parameter, which is encoded separately from
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@ -1956,8 +1956,10 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
 void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
  // Emit the size.
  Asm->OutStreamer->AddComment("Loc expr size");
-  Asm->emitInt16(DebugLocs.getBytes(Entry).size());
-
+  if (getDwarfVersion() >= 5)
+    Asm->EmitULEB128(DebugLocs.getBytes(Entry).size());
+  else
+    Asm->emitInt16(DebugLocs.getBytes(Entry).size());
  // Emit the entry.
  APByteStreamer Streamer(*Asm);
  emitDebugLocEntry(Streamer, Entry);
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
  }

 #ifndef NDEBUG
-  bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
+  bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
+                   Op.getType() == MachineOperand::MO_MCSymbol;
  // OpNo now points as the desired insertion point.  Unless this is a variadic
  // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
  // RegMask operands go between the explicit and implicit operands.
  assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
-          OpNo < MCID->getNumOperands() || isMetaDataOp) &&
+          OpNo < MCID->getNumOperands() || isDebugOp) &&
         "Trying to add an operand to a machine instr that is already done!");
 #endif

--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -16214,23 +16214,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
  // The build vector contains some number of undef elements and exactly
  // one other element. That other element must be a zero-extended scalar
  // extracted from a vector at a constant index to turn this into a shuffle.
+  // Also, require that the build vector does not implicitly truncate/extend
+  // its elements.
  // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+  EVT VT = BV->getValueType(0);
  SDValue Zext = BV->getOperand(ZextElt);
  if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
      Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+      Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
    return SDValue();

-  // The zero-extend must be a multiple of the source size.
+  // The zero-extend must be a multiple of the source size, and we must be
+  // building a vector of the same size as the source of the extract element.
  SDValue Extract = Zext.getOperand(0);
  unsigned DestSize = Zext.getValueSizeInBits();
  unsigned SrcSize = Extract.getValueSizeInBits();
-  if (DestSize % SrcSize != 0)
+  if (DestSize % SrcSize != 0 ||
+      Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
    return SDValue();

  // Create a shuffle mask that will combine the extracted element with zeros
  // and undefs.
-  int ZextRatio =  DestSize / SrcSize;
+  int ZextRatio = DestSize / SrcSize;
  int NumMaskElts = NumBVOps * ZextRatio;
  SmallVector<int, 32> ShufMask(NumMaskElts, -1);
  for (int i = 0; i != NumMaskElts; ++i) {
@ -16260,7 +16266,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
  SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
  SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
                                      ShufMask);
-  return DAG.getBitcast(BV->getValueType(0), Shuf);
+  return DAG.getBitcast(VT, Shuf);
 }

 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@ -184,7 +184,8 @@ DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
    }

    if (Kind != dwarf::DW_LLE_base_address) {
-      unsigned Bytes = Data.getU16(Offset);
+      unsigned Bytes =
+          Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
      // A single location description describing the location of the object...
      StringRef str = Data.getData().substr(*Offset, Bytes);
      *Offset += Bytes;
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@ -469,6 +469,11 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
    }
  }

+  if (Name == "seh.recoverfp") {
+    NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
+    return true;
+  }
+
  return false;
 }

@ -544,10 +549,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
      return true;
    }
-    if (Name == "x86.seh.recoverfp") {
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
-      return true;
-    }
    break;
  }

--- a/lib/Support/JSON.cpp
+++ b/lib/Support/JSON.cpp
@ -182,6 +182,12 @@ bool operator==(const Value &L, const Value &R) {
  case Value::Boolean:
    return *L.getAsBoolean() == *R.getAsBoolean();
  case Value::Number:
+    // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
+    // The same integer must convert to the same double, per the standard.
+    // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
+    // So we avoid floating point promotion for exact comparisons.
+    if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
+      return L.getAsInteger() == R.getAsInteger();
    return *L.getAsNumber() == *R.getAsNumber();
  case Value::String:
    return *L.getAsString() == *R.getAsString();
--- a/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@ -103,6 +103,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
@ -146,25 +147,31 @@ class AArch64SpeculationHardening : public MachineFunctionPass {
  BitVector RegsAlreadyMasked;

  bool functionUsesHardeningRegister(MachineFunction &MF) const;
-  bool instrumentControlFlow(MachineBasicBlock &MBB);
+  bool instrumentControlFlow(MachineBasicBlock &MBB,
+                             bool &UsesFullSpeculationBarrier);
  bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                               MachineBasicBlock *&FBB,
                               AArch64CC::CondCode &CondCode) const;
  void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
                          AArch64CC::CondCode &CondCode, DebugLoc DL) const;
-  void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+  void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI) const;
-  void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+  void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MBBI,
                                     unsigned TmpReg) const;
+  void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    DebugLoc DL) const;

  bool slhLoads(MachineBasicBlock &MBB);
  bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MBBI,
                              MachineInstr &MI, unsigned Reg);
-  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
+                                        bool UsesFullSpeculationBarrier);
  bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MBBI);
+                                  MachineBasicBlock::iterator MBBI,
+                                  bool UsesFullSpeculationBarrier);
  bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                  DebugLoc DL);
 };
@ -207,15 +214,19 @@ bool AArch64SpeculationHardening::endsWithCondControlFlow(
  return true;
 }

+void AArch64SpeculationHardening::insertFullSpeculationBarrier(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    DebugLoc DL) const {
+  // A full control flow speculation barrier consists of (DSB SYS + ISB)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
+}
+
 void AArch64SpeculationHardening::insertTrackingCode(
    MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
    DebugLoc DL) const {
  if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
-        .addImm(0xf);
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
-        .addImm(0xf);
+    insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
  } else {
    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
        .addDef(MisspeculatingTaintReg)
@ -227,7 +238,7 @@ void AArch64SpeculationHardening::insertTrackingCode(
 }

 bool AArch64SpeculationHardening::instrumentControlFlow(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
  LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);

  bool Modified = false;
@ -263,55 +274,105 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
  }

  // Perform correct code generation around function calls and before returns.
-  {
-    SmallVector<MachineInstr *, 4> ReturnInstructions;
-    SmallVector<MachineInstr *, 4> CallInstructions;
+  // The below variables record the return/terminator instructions and the call
+  // instructions respectively; including which register is available as a
+  // temporary register just before the recorded instructions.
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
+  // if a temporary register is not available for at least one of the
+  // instructions for which we need to transfer taint to the stack pointer, we
+  // need to insert a full speculation barrier.
+  // TmpRegisterNotAvailableEverywhere tracks that condition.
+  bool TmpRegisterNotAvailableEverywhere = false;

-    for (MachineInstr &MI : MBB) {
-      if (MI.isReturn())
-        ReturnInstructions.push_back(&MI);
-      else if (MI.isCall())
-        CallInstructions.push_back(&MI);
-    }
+  RegScavenger RS;
+  RS.enterBasicBlock(MBB);

-    Modified |=
-        (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
+    MachineInstr &MI = *I;
+    if (!MI.isReturn() && !MI.isCall())
+      continue;

-    for (MachineInstr *Return : ReturnInstructions)
-      insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
-    for (MachineInstr *Call : CallInstructions) {
-      // Just after the call:
-      MachineBasicBlock::iterator i = Call;
-      i++;
-      insertSPToRegTaintPropagation(Call->getParent(), i);
-      // Just before the call:
-      insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
-    }
+    // The RegScavenger represents registers available *after* the MI
+    // instruction pointed to by RS.getCurrentPosition().
+    // We need to have a register that is available *before* the MI is executed.
+    if (I != MBB.begin())
+      RS.forward(std::prev(I));
+    // FIXME: The below just finds *a* unused register. Maybe code could be
+    // optimized more if this looks for the register that isn't used for the
+    // longest time around this place, to enable more scheduling freedom. Not
+    // sure if that would actually result in a big performance difference
+    // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
+    // already to do this - but it's unclear if that could easily be used here.
+    unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+    LLVM_DEBUG(dbgs() << "RS finds "
+                      << ((TmpReg == 0) ? "no register " : "register ");
+               if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
+               dbgs() << "to be available at MI " << MI);
+    if (TmpReg == 0)
+      TmpRegisterNotAvailableEverywhere = true;
+    if (MI.isReturn())
+      ReturnInstructions.push_back({&MI, TmpReg});
+    else if (MI.isCall())
+      CallInstructions.push_back({&MI, TmpReg});
  }

+  if (TmpRegisterNotAvailableEverywhere) {
+    // When a temporary register is not available everywhere in this basic
+    // basic block where a propagate-taint-to-sp operation is needed, just
+    // emit a full speculation barrier at the start of this basic block, which
+    // renders the taint/speculation tracking in this basic block unnecessary.
+    insertFullSpeculationBarrier(MBB, MBB.begin(),
+                                 (MBB.begin())->getDebugLoc());
+    UsesFullSpeculationBarrier = true;
+    Modified = true;
+  } else {
+    for (auto MI_Reg : ReturnInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(
+          dbgs()
+          << " About to insert Reg to SP taint propagation with temp register "
+          << printReg(MI_Reg.second, TRI)
+          << " on instruction: " << *MI_Reg.first);
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
+    }
+
+    for (auto MI_Reg : CallInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
+                           "propagation with temp register "
+                        << printReg(MI_Reg.second, TRI)
+                        << " around instruction: " << *MI_Reg.first);
+      // Just after the call:
+      insertSPToRegTaintPropagation(
+          MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
+      // Just before the call:
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
+    }
+  }
  return Modified;
 }

 void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
  // If full control flow speculation barriers are used, emit a control flow
  // barrier to block potential miss-speculation in flight coming in to this
  // function.
  if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+    insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
    return;
  }

  // CMP   SP, #0   === SUBS   xzr, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
      .addDef(AArch64::XZR)
      .addUse(AArch64::SP)
      .addImm(0)
      .addImm(0); // no shift
  // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
      .addDef(MisspeculatingTaintReg)
      .addUse(AArch64::XZR)
      .addUse(AArch64::XZR)
@ -319,7 +380,7 @@ void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
 }

 void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
    unsigned TmpReg) const {
  // If full control flow speculation barriers are used, there will not be
  // miss-speculation when returning from this function, and therefore, also
@ -328,19 +389,19 @@ void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
    return;

  // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
      .addDef(TmpReg)
      .addUse(AArch64::SP)
      .addImm(0)
      .addImm(0); // no shift
  // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
      .addDef(TmpReg, RegState::Renamable)
      .addUse(TmpReg, RegState::Kill | RegState::Renamable)
      .addUse(MisspeculatingTaintReg, RegState::Kill)
      .addImm(0);
  // mov   SP, Xtmp === ADD SP, Xtmp, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
      .addDef(AArch64::SP)
      .addUse(TmpReg, RegState::Kill)
      .addImm(0)
@ -484,7 +545,8 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
 /// \brief If MBBI references a pseudo instruction that should be expanded
 /// here, do the expansion and return true. Otherwise return false.
 bool AArch64SpeculationHardening::expandSpeculationSafeValue(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    bool UsesFullSpeculationBarrier) {
  MachineInstr &MI = *MBBI;
  unsigned Opcode = MI.getOpcode();
  bool Is64Bit = true;
@ -499,7 +561,7 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue(
    // Just remove the SpeculationSafe pseudo's if control flow
    // miss-speculation isn't happening because we're already inserting barriers
    // to guarantee that.
-    if (!UseControlFlowSpeculationBarrier) {
+    if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
      unsigned DstReg = MI.getOperand(0).getReg();
      unsigned SrcReg = MI.getOperand(1).getReg();
      // Mark this register and all its aliasing registers as needing to be
@ -537,7 +599,7 @@ bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
 }

 bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
  bool Modified = false;

  RegsNeedingCSDBBeforeUse.reset();
@ -572,15 +634,16 @@ bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
          break;
        }

-    if (NeedToEmitBarrier)
+    if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
      Modified |= insertCSDB(MBB, MBBI, DL);

-    Modified |= expandSpeculationSafeValue(MBB, MBBI);
+    Modified |=
+        expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);

    MBBI = NMBBI;
  }

-  if (RegsNeedingCSDBBeforeUse.any())
+  if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
    Modified |= insertCSDB(MBB, MBBI, DL);

  return Modified;
@ -609,7 +672,7 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
      Modified |= slhLoads(MBB);
  }

-  // 2.a Add instrumentation code to function entry and exits.
+  // 2. Add instrumentation code to function entry and exits.
  LLVM_DEBUG(
      dbgs()
      << "***** AArch64SpeculationHardening - track control flow *****\n");
@ -620,17 +683,15 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
    EntryBlocks.push_back(LPI.LandingPadBlock);
  for (auto Entry : EntryBlocks)
    insertSPToRegTaintPropagation(
-        Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
+        *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));

-  // 2.b Add instrumentation code to every basic block.
-  for (auto &MBB : MF)
-    Modified |= instrumentControlFlow(MBB);
-
-  LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
-                       "SpeculationSafeValue Pseudos *****\n");
-  // Step 3: Lower SpeculationSafeValue pseudo instructions.
-  for (auto &MBB : MF)
-    Modified |= lowerSpeculationSafeValuePseudos(MBB);
+  // 3. Add instrumentation code to every basic block.
+  for (auto &MBB : MF) {
+    bool UsesFullSpeculationBarrier = false;
+    Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
+    Modified |=
+        lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
+  }

  return Modified;
 }
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@ -65,10 +65,7 @@ class MCInstrInfo;

 } // end namespace llvm

-static cl::opt<bool>
-EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
-              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
-              cl::init(true));
+extern cl::opt<bool> EmitJalrReloc;

 namespace {

--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@ -15,6 +15,13 @@

 using namespace llvm;

+// Note: this option is defined here to be visible from libLLVMMipsAsmParser
+//       and libLLVMMipsCodeGen
+cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+              cl::init(true));
+
 namespace {
 static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};

--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@ -89,7 +89,10 @@ namespace MipsII {
    MO_GOT_HI16,
    MO_GOT_LO16,
    MO_CALL_HI16,
-    MO_CALL_LO16
+    MO_CALL_LO16,
+
+    /// Helper operand used to generate R_MIPS_JALR
+    MO_JALR
  };

  enum {
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@ -614,8 +614,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
      llvm_unreachable("Unhandled fixup kind!");
      break;
    case MipsMCExpr::MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-      break;
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getExprOpValue(MipsExpr->getSubExpr(), Fixups, STI);
    case MipsMCExpr::MEK_CALL_HI16:
      FixupKind = Mips::fixup_Mips_CALL_HI16;
      break;
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@ -44,8 +44,10 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
    llvm_unreachable("MEK_None and MEK_Special are invalid");
    break;
  case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
+    // MEK_DTPREL is used for marking TLS DIEExpr only
+    // and contains a regular sub-expression.
+    getSubExpr()->print(OS, MAI, true);
+    return;
  case MEK_CALL_HI16:
    OS << "%call_hi";
    break;
@ -161,7 +163,9 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
    case MEK_Special:
      llvm_unreachable("MEK_None and MEK_Special are invalid");
    case MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
    case MEK_DTPREL_HI:
    case MEK_DTPREL_LO:
    case MEK_GOT:
@ -249,9 +253,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
  case MEK_Special:
    llvm_unreachable("MEK_None and MEK_Special are invalid");
    break;
-  case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
  case MEK_CALL_HI16:
  case MEK_CALL_LO16:
  case MEK_GOT:
@ -274,6 +275,7 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
    if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr()))
      E->fixELFSymbolsInTLSFixups(Asm);
    break;
+  case MEK_DTPREL:
  case MEK_DTPREL_HI:
  case MEK_DTPREL_LO:
  case MEK_TLSLDM:
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@ -460,6 +460,7 @@ class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
  let isCall = 1;
  let hasDelaySlot = 0;
  let Defs = [RA];
+  let hasPostISelHook = 1;
 }
 class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;

--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@ -426,6 +426,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
  let isCall = 1;
  let hasDelaySlot = 1;
  let Defs = [RA];
+  let hasPostISelHook = 1;
 }

 // 16-bit Jump Reg
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@ -1105,7 +1105,7 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),

 // Pseudo instructions
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
-    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT], hasPostISelHook = 1 in {
  class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
    PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@ -68,6 +68,8 @@ using namespace llvm;

 #define DEBUG_TYPE "mips-asm-printer"

+extern cl::opt<bool> EmitJalrReloc;
+
 MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
  return static_cast<MipsTargetStreamer &>(*OutStreamer->getTargetStreamer());
 }
@ -148,6 +150,40 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
  EmitToStreamer(OutStreamer, TmpInst0);
 }

+// If there is an MO_JALR operand, insert:
+//
+// .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+// tmplabel:
+//
+// This is an optimization hint for the linker which may then replace
+// an indirect call with a direct branch.
+static void emitDirectiveRelocJalr(const MachineInstr &MI,
+                                   MCContext &OutContext,
+                                   TargetMachine &TM,
+                                   MCStreamer &OutStreamer,
+                                   const MipsSubtarget &Subtarget) {
+  for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands();
+       I < E; ++I) {
+    MachineOperand MO = MI.getOperand(I);
+    if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) {
+      MCSymbol *Callee = MO.getMCSymbol();
+      if (Callee && !Callee->getName().empty()) {
+        MCSymbol *OffsetLabel = OutContext.createTempSymbol();
+        const MCExpr *OffsetExpr =
+            MCSymbolRefExpr::create(OffsetLabel, OutContext);
+        const MCExpr *CaleeExpr =
+            MCSymbolRefExpr::create(Callee, OutContext);
+        OutStreamer.EmitRelocDirective
+            (*OffsetExpr,
+             Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+             CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo());
+        OutStreamer.EmitLabel(OffsetLabel);
+        return;
+      }
+    }
+  }
+}
+
 void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
  MipsTargetStreamer &TS = getTargetStreamer();
  unsigned Opc = MI->getOpcode();
@ -207,6 +243,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    return;
  }

+  if (EmitJalrReloc &&
+      (MI->isReturn() || MI->isCall() || MI->isIndirectBranch())) {
+    emitDirectiveRelocJalr(*MI, OutContext, TM, *OutStreamer, *Subtarget);
+  }
+
  MachineBasicBlock::const_instr_iterator I = MI->getIterator();
  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();

--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@ -56,6 +56,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
@ -75,6 +76,8 @@

 using namespace llvm;

+extern cl::opt<bool> EmitJalrReloc;
+
 namespace {

 class MipsFastISel final : public FastISel {
@ -1551,6 +1554,16 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {

  CLI.Call = MIB;

+  if (EmitJalrReloc && !Subtarget->inMips16Mode()) {
+    // Attach callee address to the instruction, let asm printer emit
+    // .reloc R_MIPS_JALR.
+    if (Symbol)
+      MIB.addSym(Symbol, MipsII::MO_JALR);
+    else
+      MIB.addSym(FuncInfo.MF->getContext().getOrCreateSymbol(
+	                   Addr.getGlobalValue()->getName()), MipsII::MO_JALR);
+  }
+
  // Finish off the call including any return values.
  return finishCall(CLI, RetVT, NumBytes);
 }
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@ -57,6 +57,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
@ -91,6 +92,8 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
               cl::desc("MIPS: Don't trap on integer division by zero."),
               cl::init(false));

+extern cl::opt<bool> EmitJalrReloc;
+
 static const MCPhysReg Mips64DPRegs[8] = {
  Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
  Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@ -2879,6 +2882,54 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
    Ops.push_back(InFlag);
 }

+void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                                       SDNode *Node) const {
+  switch (MI.getOpcode()) {
+    default:
+      return;
+    case Mips::JALR:
+    case Mips::JALRPseudo:
+    case Mips::JALR64:
+    case Mips::JALR64Pseudo:
+    case Mips::JALR16_MM:
+    case Mips::JALRC16_MMR6:
+    case Mips::TAILCALLREG:
+    case Mips::TAILCALLREG64:
+    case Mips::TAILCALLR6REG:
+    case Mips::TAILCALL64R6REG:
+    case Mips::TAILCALLREG_MM:
+    case Mips::TAILCALLREG_MMR6: {
+      if (!EmitJalrReloc ||
+          Subtarget.inMips16Mode() ||
+          !isPositionIndependent() ||
+          Node->getNumOperands() < 1 ||
+          Node->getOperand(0).getNumOperands() < 2) {
+        return;
+      }
+      // We are after the callee address, set by LowerCall().
+      // If added to MI, asm printer will emit .reloc R_MIPS_JALR for the
+      // symbol.
+      const SDValue TargetAddr = Node->getOperand(0).getOperand(1);
+      StringRef Sym;
+      if (const GlobalAddressSDNode *G =
+              dyn_cast_or_null<const GlobalAddressSDNode>(TargetAddr)) {
+        Sym = G->getGlobal()->getName();
+      }
+      else if (const ExternalSymbolSDNode *ES =
+                   dyn_cast_or_null<const ExternalSymbolSDNode>(TargetAddr)) {
+        Sym = ES->getSymbol();
+      }
+
+      if (Sym.empty())
+        return;
+
+      MachineFunction *MF = MI.getParent()->getParent();
+      MCSymbol *S = MF->getContext().getOrCreateSymbol(Sym);
+      MI.addOperand(MachineOperand::CreateMCSymbol(S, MipsII::MO_JALR));
+    }
+  }
+}
+
 /// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 SDValue
@ -2930,7 +2981,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
  // the maximum out going argument area (including the reserved area), and
  // preallocates the stack space on entrance to the caller.
  //
-  // FIXME: We should do the same for efficency and space.
+  // FIXME: We should do the same for efficiency and space.

  // Note: The check on the calling convention below must match
  //       MipsABIInfo::GetCalleeAllocdArgSizeInBytes().
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@ -341,6 +341,9 @@ class TargetRegisterClass;
    EmitInstrWithCustomInserter(MachineInstr &MI,
                                MachineBasicBlock *MBB) const override;

+    void AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                       SDNode *Node) const override;
+
    void HandleByVal(CCState *, unsigned &, unsigned) const override;

    unsigned getRegisterByName(const char* RegName, EVT VT,
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@ -653,6 +653,16 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,

    MIB.addImm(0);

+    // If I has an MCSymbol operand (used by asm printer, to emit R_MIPS_JALR),
+    // add it to the new instruction.
+    for (unsigned J = I->getDesc().getNumOperands(), E = I->getNumOperands();
+         J < E; ++J) {
+      const MachineOperand &MO = I->getOperand(J);
+      if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR))
+        MIB.addSym(MO.getMCSymbol(), MipsII::MO_JALR);
+    }
+
+
  } else {
    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
@ -825,7 +835,8 @@ MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
    {MO_GOT_HI16,     "mips-got-hi16"},
    {MO_GOT_LO16,     "mips-got-lo16"},
    {MO_CALL_HI16,    "mips-call-hi16"},
-    {MO_CALL_LO16,    "mips-call-lo16"}
+    {MO_CALL_LO16,    "mips-call-lo16"},
+    {MO_JALR,         "mips-jalr"}
  };
  return makeArrayRef(Flags);
 }
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@ -1623,11 +1623,15 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in {
  class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
                          Register RetReg, RegisterOperand ResRO = RO>:
    PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
-    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
+    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)> {
+    let hasPostISelHook = 1;
+  }

  class JumpLinkReg<string opstr, RegisterOperand RO>:
    InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
-           [], II_JALR, FrmR, opstr>;
+           [], II_JALR, FrmR, opstr> {
+    let hasPostISelHook = 1;
+  }

  class BGEZAL_FT<string opstr, DAGOperand opnd,
                  RegisterOperand RO> :
@ -1646,7 +1650,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,

  class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
-    PseudoInstExpansion<(JumpInst RO:$rs)>;
+    PseudoInstExpansion<(JumpInst RO:$rs)> {
+    let hasPostISelHook = 1;
+  }
 }

 class BAL_BR_Pseudo<Instruction RealInst, DAGOperand opnd> :
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@ -117,6 +117,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
  case MipsII::MO_CALL_LO16:
    TargetKind = MipsMCExpr::MEK_CALL_LO16;
    break;
+  case MipsII::MO_JALR:
+    return MCOperand();
  }

  switch (MOTy) {
--- a/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ b/lib/Target/X86/X86DiscriminateMemOps.cpp
@ -27,6 +27,14 @@ using namespace llvm;

 #define DEBUG_TYPE "x86-discriminate-memops"

+static cl::opt<bool> EnableDiscriminateMemops(
+    DEBUG_TYPE, cl::init(false),
+    cl::desc("Generate unique debug info for each instruction with a memory "
+             "operand. Should be enabled for profile-drived cache prefetching, "
+             "both in the build of the binary being profiled, as well as in "
+             "the build of the binary consuming the profile."),
+    cl::Hidden);
+
 namespace {

 using Location = std::pair<StringRef, unsigned>;
@ -67,6 +75,9 @@ char X86DiscriminateMemOps::ID = 0;
 X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}

 bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+  if (!EnableDiscriminateMemops)
+    return false;
+
  DISubprogram *FDI = MF.getFunction().getSubprogram();
  if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
    return false;
--- a/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/lib/Target/X86/X86InsertPrefetch.cpp
@ -34,7 +34,8 @@ using namespace sampleprof;

 static cl::opt<std::string>
    PrefetchHintsFile("prefetch-hints-file",
-                      cl::desc("Path to the prefetch hints profile."),
+                      cl::desc("Path to the prefetch hints profile. See also "
+                               "-x86-discriminate-memops"),
                      cl::Hidden);
 namespace {

--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@ -249,6 +249,8 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
  bool DoPromote = false;
  if (GV.hasLocalLinkage() &&
      ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+    // Save the original name string before we rename GV below.
+    auto Name = GV.getName().str();
    // Once we change the name or linkage it is difficult to determine
    // again whether we should promote since shouldPromoteLocalToGlobal needs
    // to locate the summary (based on GUID from name and linkage). Therefore,
@ -257,6 +259,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
    GV.setLinkage(getLinkage(&GV, DoPromote));
    if (!GV.hasLocalLinkage())
      GV.setVisibility(GlobalValue::HiddenVisibility);
+
+    // If we are renaming a COMDAT leader, ensure that we record the COMDAT
+    // for later renaming as well. This is required for COFF.
+    if (const auto *C = GV.getComdat())
+      if (C->getName() == Name)
+        RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
  } else
    GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));

@ -281,6 +289,16 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
    processGlobalForThinLTO(SF);
  for (GlobalAlias &GA : M.aliases())
    processGlobalForThinLTO(GA);
+
+  // Replace any COMDATS that required renaming (because the COMDAT leader was
+  // promoted and renamed).
+  if (!RenamedComdats.empty())
+    for (auto &GO : M.global_objects())
+      if (auto *C = GO.getComdat()) {
+        auto Replacement = RenamedComdats.find(C);
+        if (Replacement != RenamedComdats.end())
+          GO.setComdat(Replacement->second);
+      }
 }

 bool FunctionImportGlobalProcessing::run() {
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@ -217,7 +217,10 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
    // When the value is absent it is interpreted as 'attribute set'.
    return true;
  case 2:
-    return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+    if (ConstantInt *IntMD =
+            mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
+      return IntMD->getZExtValue();
+    return true;
  }
  llvm_unreachable("unexpected number of options");
 }
@ -376,17 +379,17 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
  Optional<int> InterleaveCount =
      getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");

-  if (Enable == true) {
-    // 'Forcing' vector width and interleave count to one effectively disables
-    // this tranformation.
-    if (VectorizeWidth == 1 && InterleaveCount == 1)
-      return TM_SuppressedByUser;
-    return TM_ForcedByUser;
-  }
+  // 'Forcing' vector width and interleave count to one effectively disables
+  // this tranformation.
+  if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+    return TM_SuppressedByUser;

  if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
    return TM_Disable;

+  if (Enable == true)
+    return TM_ForcedByUser;
+
  if (VectorizeWidth == 1 && InterleaveCount == 1)
    return TM_Disable;

--- a/test/CodeGen/AArch64/build-vector-extract.ll
+++ b/test/CodeGen/AArch64/build-vector-extract.ll
@ -0,0 +1,441 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    zip1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[2]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[3]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    zip1 v1.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov v0.s[3], wzr
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[2]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[3]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+; This would crash because we did not expect to create
+; a shuffle for a vector where the source operand is
+; not the same size as the result.
+; TODO: Should we handle this pattern? Ie, is moving to/from
+; registers the optimal code?
+
+define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
+; CHECK-LABEL: larger_bv_than_source:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %t1 = extractelement <4 x i16> %t0, i32 2
+  %vgetq_lane = zext i16 %t1 to i32
+  %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0
+  ret <4 x i32> %t2
+}
+
--- a/test/CodeGen/AArch64/eh_recoverfp.ll
+++ b/test/CodeGen/AArch64/eh_recoverfp.ll
@ -0,0 +1,11 @@
+; RUN: llc -mtriple arm64-windows %s -o - 2>&1 | FileCheck %s
+
+define i8* @foo(i8* %a) {
+; CHECK-LABEL: foo
+; CHECK-NOT: llvm.x86.seh.recoverfp
+  %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @f to i8*), i8* %a)
+  ret i8* %1
+}
+
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+declare i32 @f()
--- a/test/CodeGen/AArch64/speculation-hardening-loads.ll
+++ b/test/CodeGen/AArch64/speculation-hardening-loads.ll
@ -11,10 +11,10 @@ entry:
 ; CHECK-NEXT: and   x8, x8, x16
 ; CHECK-NEXT: and   x1, x1, x16
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
-; CHECK-NEXT: and x17, x17, x16
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
 ; CHECK-NEXT: mov x0, x8
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }

@ -29,9 +29,9 @@ entry:
 ; CHECK-NEXT: and   x0, x0, x16
 ; CHECK-NEXT: csdb
 ; CHECK-NEXT: ldr   d0, [x0]
-; CHECK-NEXT: mov x17, sp
-; CHECK-NEXT: and x17, x17, x16
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }

@ -51,12 +51,12 @@ entry:
 ; CHECK-NEXT: and x8, x8, x16
 ; csdb instruction must occur before the add instruction with w8 as operand.
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
 ; CHECK-NEXT: add w9, w1, w8
 ; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: and x17, x17, x16
 ; CHECK-NEXT: csel w0, w1, w9, eq
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }

@ -76,12 +76,12 @@ entry:
 ; CHECK-NEXT: and w8, w8, w16
 ; csdb instruction must occur before the add instruction with x8 as operand.
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
 ; CHECK-NEXT: add x9, x1, x8
 ; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: and x17, x17, x16
 ; CHECK-NEXT: csel x0, x1, x9, eq
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }

@ -112,11 +112,11 @@ entry:
 ; CHECK-NEXT:  and     x1, x1, x16
 ; CHECK-NEXT:  csdb
 ; CHECK-NEXT:  ldr     d0, [x1]
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
 ; CHECK-NEXT:  mov     v0.d[1], v0.d[0]
 ; CHECK-NEXT:  str     q0, [x0]
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
 }

@ -129,9 +129,9 @@ entry:
 ; CHECK-NEXT:  and     x1, x1, x16
 ; CHECK-NEXT:  csdb
 ; CHECK-NEXT:  ld1     { v0.d }[0], [x1]
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
  %0 = load double, double* %b, align 16
  %vld1_lane = insertelement <2 x double> <double undef, double 0.000000e+00>, double %0, i32 0
@ -147,9 +147,9 @@ entry:
 ; CHECK-NEXT:  .cfi_def_cfa_offset 16
 ; CHECK-NEXT:  ldr     w8, [sp, #12]
 ; CHECK-NEXT:  add     sp, sp, #16
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
  %a = alloca i32, align 4
  %val = load volatile i32, i32* %a, align 4
--- a/test/CodeGen/AArch64/speculation-hardening.ll
+++ b/test/CodeGen/AArch64/speculation-hardening.ll
@ -1,9 +1,9 @@
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure
+; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure

 define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
 ; CHECK-LABEL: f
@ -13,12 +13,12 @@ entry:
 ; NOSLH-NOT:  cmp sp, #0
 ; NOSLH-NOT:  csetm x16, ne

-; SLH:  mov x17, sp
-; SLH:  and x17, x17, x16
-; SLH:  mov sp, x17
-; NOSLH-NOT:  mov x17, sp
-; NOSLH-NOT:  and x17, x17, x16
-; NOSLH-NOT:  mov sp, x17
+; SLH:  mov [[TMPREG:x[0-9]+]], sp
+; SLH:  and [[TMPREG]], [[TMPREG]], x16
+; SLH:  mov sp, [[TMPREG]]
+; NOSLH-NOT:  mov [[TMPREG:x[0-9]+]], sp
+; NOSLH-NOT:  and [[TMPREG]], [[TMPREG]], x16
+; NOSLH-NOT:  mov sp, [[TMPREG]]
  %call = tail call i32 @tail_callee(i32 %i)
 ; SLH:  cmp sp, #0
 ; SLH:  csetm x16, ne
@ -43,29 +43,37 @@ if.then:                                          ; preds = %entry
 ; NOSLH-NOT: csel x16, x16, xzr, [[COND]]
 return:                                           ; preds = %entry, %if.then
  %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ]
-; SLH:  mov x17, sp
-; SLH:  and x17, x17, x16
-; SLH:  mov sp, x17
-; NOSLH-NOT:  mov x17, sp
-; NOSLH-NOT:  and x17, x17, x16
-; NOSLH-NOT:  mov sp, x17
+; SLH:  mov [[TMPREG:x[0-9]+]], sp
+; SLH:  and [[TMPREG]], [[TMPREG]], x16
+; SLH:  mov sp, [[TMPREG]]
+; NOSLH-NOT:  mov [[TMPREG:x[0-9]+]], sp
+; NOSLH-NOT:  and [[TMPREG]], [[TMPREG]], x16
+; NOSLH-NOT:  mov sp, [[TMPREG]]
  ret i32 %retval.0
 }

 ; Make sure that for a tail call, taint doesn't get put into SP twice.
 define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR {
 ; CHECK-LABEL: tail_caller:
-; SLH:     mov     x17, sp
-; SLH:     and     x17, x17, x16
-; SLH:     mov     sp, x17
-; NOSLH-NOT:     mov     x17, sp
-; NOSLH-NOT:     and     x17, x17, x16
-; NOSLH-NOT:     mov     sp, x17
+; NOGISELSLH:     mov [[TMPREG:x[0-9]+]], sp
+; NOGISELSLH:     and [[TMPREG]], [[TMPREG]], x16
+; NOGISELSLH:     mov sp, [[TMPREG]]
+; NOGISELNOSLH-NOT:     mov [[TMPREG:x[0-9]+]], sp
+; NOGISELNOSLH-NOT:     and [[TMPREG]], [[TMPREG]], x16
+; NOGISELNOSLH-NOT:     mov sp, [[TMPREG]]
+; GISELSLH:     mov [[TMPREG:x[0-9]+]], sp
+; GISELSLH:     and [[TMPREG]], [[TMPREG]], x16
+; GISELSLH:     mov sp, [[TMPREG]]
+; GISELNOSLH-NOT:     mov [[TMPREG:x[0-9]+]], sp
+; GISELNOSLH-NOT:     and [[TMPREG]], [[TMPREG]], x16
+; GISELNOSLH-NOT:     mov sp, [[TMPREG]]
 ;  GlobalISel doesn't optimize tail calls (yet?), so only check that
 ;  cross-call taint register setup code is missing if a tail call was
 ;  actually produced.
-; SLH:     {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}}
-; SLH-NOT: cmp sp, #0
+; NOGISELSLH:     b tail_callee
+; GISELSLH:       bl tail_callee
+; GISELSLH:       cmp sp, #0
+; SLH-NOT:        cmp sp, #0
  %call = tail call i32 @tail_callee(i32 %a)
  ret i32 %call
 }
--- a/test/CodeGen/AArch64/speculation-hardening.mir
+++ b/test/CodeGen/AArch64/speculation-hardening.mir
@ -25,6 +25,22 @@
  define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening {
   ret void
  }
+  ; Also check that a non-default temporary register gets picked correctly to
+  ; transfer the SP to to and it with the taint register when the default
+  ; temporary isn't available.
+  define void @indirect_call_x17(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  @g = common dso_local local_unnamed_addr global i64 (...)* null, align 8
+  define void @indirect_tailcall_x17(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  define void @indirect_call_lr(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  define void @RS_cannot_find_available_regs() speculative_load_hardening {
+   ret void
+  }
 ...
 ---
 name:            nobranch_fallthrough
@ -115,3 +131,72 @@ body:             |
  ; CHECK-NOT: csel
   RET undef $lr, implicit $x0
 ...
+---
+name:            indirect_call_x17
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x17
+    ; CHECK-LABEL: indirect_call_x17
+    ; CHECK:       mov x0, sp
+    ; CHECK:       and x0, x0, x16
+    ; CHECK:       mov sp, x0
+    ; CHECK:       blr x17
+    BLR killed renamable $x17, implicit-def dead $lr, implicit $sp
+    RET undef $lr, implicit undef $w0
+...
+---
+name:           indirect_tailcall_x17
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: indirect_tailcall_x17
+    ; CHECK:       mov x1, sp
+    ; CHECK:       and x1, x1, x16
+    ; CHECK:       mov sp, x1
+    ; CHECK:       br x17
+    $x8 = ADRP target-flags(aarch64-page) @g
+    $x17 = LDRXui killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @g
+    TCRETURNri killed $x17, 0, implicit $sp, implicit $x0
+...
+---
+name:           indirect_call_lr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: indirect_call_lr
+    ; CHECK:            mov x1, sp
+    ; CHECK-NEXT:       and x1, x1, x16
+    ; CHECK-NEXT:       mov sp, x1
+    ; CHECK-NEXT:       blr x30
+    liveins: $x0, $lr
+    BLR killed renamable $lr, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+    $w0 = nsw ADDWri killed $w0, 1, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:           RS_cannot_find_available_regs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; In the rare case when no free temporary register is available for the
+    ; propagate taint-to-sp operation, just put in a full speculation barrier
+    ; (isb+dsb sy) at the start of the basic block. And don't put masks on
+    ; instructions for the rest of the basic block, since speculation in that
+    ; basic block was already done, so no need to do masking.
+    ; CHECK-LABEL: RS_cannot_find_available_regs
+    ; CHECK:       dsb sy
+    ; CHECK-NEXT:  isb
+    ; CHECK-NEXT:  ldr x0, [x0]
+    ; The following 2 instructions come from propagating the taint encoded in
+    ; sp at function entry to x16. It turns out the taint info in x16 is not
+    ; used in this function, so those instructions could be optimized away. An
+    ; optimization for later if it turns out this situation occurs often enough.
+    ; CHECK-NEXT:  cmp sp, #0
+    ; CHECK-NEXT:  csetm x16, ne
+    ; CHECK-NEXT:  ret
+    liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr
+     $x0 = LDRXui killed $x0, 0
+     RET undef $lr, implicit $x0
+...
--- a/test/CodeGen/Mips/cconv/vector.ll
+++ b/test/CodeGen/Mips/cconv/vector.ll
@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
-; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
 ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
-; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB
 ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
-; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
 ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
-; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL

 ; Test that vector types are passed through the integer register set whether or
 ; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
--- a/test/CodeGen/Mips/gprestore.ll
+++ b/test/CodeGen/Mips/gprestore.ll
@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=O32
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=N64
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 | FileCheck %s --check-prefix=N32
-; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3O32
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3N64
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 | FileCheck %s --check-prefix=O3N32
+; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=O32
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=N64
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -mips-jalr-reloc=false | FileCheck %s --check-prefix=N32
+; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3O32
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N64
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N32

 ; Test that PIC calls use the $25 register. This is an ABI requirement.

--- a/test/CodeGen/Mips/llvm-ir/sdiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll
@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6

 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6

-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6

 define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: sdiv_i1:
--- a/test/CodeGen/Mips/llvm-ir/srem.ll
+++ b/test/CodeGen/Mips/llvm-ir/srem.ll
@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6

 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6

-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6

 define signext i1 @srem_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: srem_i1:
--- a/test/CodeGen/Mips/llvm-ir/udiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/udiv.ll
@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6

 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R2
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6

-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6

 define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) {
 ; GP32-LABEL: udiv_i1:
--- a/test/CodeGen/Mips/llvm-ir/urem.ll
+++ b/test/CodeGen/Mips/llvm-ir/urem.ll
@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6

 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6

-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6

 define signext i1 @urem_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: urem_i1:
--- a/test/CodeGen/Mips/long-call-attr.ll
+++ b/test/CodeGen/Mips/long-call-attr.ll
@ -1,11 +1,11 @@
 ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=O32 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s
 ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=O32 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s
 ; RUN: llc -march=mips64 -target-abi n64 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=N64 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s
 ; RUN: llc -march=mips64 -target-abi n64 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=N64 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s

 declare void @far() #0

--- a/test/CodeGen/Mips/long-call-mcount.ll
+++ b/test/CodeGen/Mips/long-call-mcount.ll
@ -1,8 +1,8 @@
 ; Check call to mcount in case of long/short call options.
 ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,LONG %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,LONG %s
 ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,SHORT %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,SHORT %s

 ; Function Attrs: noinline nounwind optnone
 define void @foo() #0 {
--- a/test/CodeGen/Mips/msa/f16-llvm-ir.ll
+++ b/test/CodeGen/Mips/msa/f16-llvm-ir.ll
@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR5,MIPS32-O32,MIPS32R5-O32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N32,MIPS64R5-N32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N64,MIPS64R5-N64

 ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR6,MIPSR6-O32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N32,MIPSR6-N32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N64,MIPSR6-N64


--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic \
+; RUN:     -mips-jalr-reloc=false < %s | FileCheck %s

 %0 = type { i8, i16, i32, i64, double, i32, [4 x i8] }
 %struct.S1 = type { i8, i16, i32, i64, double, i32 }
--- a/test/CodeGen/Mips/reloc-jalr.ll
+++ b/test/CodeGen/Mips/reloc-jalr.ll
@ -0,0 +1,154 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips32r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R6,TAILCALL-32R6
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips64r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R6,TAILCALL-64R6
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips32r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips64r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mattr=+micromips -mcpu=mips32r2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM,TAILCALL-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mattr=+micromips -mcpu=mips32r6 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips32r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R6
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips64r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R6
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips32r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips64r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mattr=+micromips -mcpu=mips32r2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mattr=+micromips -mcpu=mips32r6 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O0 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \
+; RUN:     -O0 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -mips-tail-calls=1 \
+; RUN:     -O2 -relocation-model=static < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static \
+; RUN:     -O0 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+define internal void @foo() noinline {
+entry:
+  ret void
+}
+
+define void @checkCall() {
+entry:
+; ALL-LABEL: checkCall:
+  call void @foo()
+;	JALR-32R2: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; JALR-32R2-NEXT: [[TMPLABEL]]:
+;	JALR-32R2-NEXT: 	jalr	$25
+
+;	JALR-64R2: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; JALR-64R2-NEXT: [[TMPLABEL]]:
+;	JALR-64R2-NEXT: 	jalr	$25
+
+;	JALR-MM: 	.reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo
+; JALR-MM-NEXT: [[TMPLABEL]]:
+;	JALR-MM-NEXT: 	jalr	$25
+
+;	JALR-32R6: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; JALR-32R6-NEXT: [[TMPLABEL]]:
+;	JALR-32R6-NEXT: 	jalrc	$25
+
+;	JALR-64R6: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; JALR-64R6-NEXT: [[TMPLABEL]]:
+;	JALR-64R6-NEXT: 	jalrc	$25
+
+; NORELOC-NOT: R_MIPS_JALR
+ ret void
+}
+
+define void @checkTailCall() {
+entry:
+; ALL-LABEL: checkTailCall:
+  tail call void @foo()
+;	TAILCALL-32R2: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; TAILCALL-32R2-NEXT: [[TMPLABEL]]:
+;	TAILCALL-32R2-NEXT: 	jr	$25
+
+;	TAILCALL-64R2: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; TAILCALL-64R2-NEXT: [[TMPLABEL]]:
+;	TAILCALL-64R2-NEXT: 	jr	$25
+
+;	TAILCALL-MM: 	.reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo
+; TAILCALL-MM-NEXT: [[TMPLABEL]]:
+;	TAILCALL-MM-NEXT: 	jrc	$25
+
+;	TAILCALL-32R6: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; TAILCALL-32R6-NEXT: [[TMPLABEL]]:
+;	TAILCALL-32R6-NEXT: 	jrc	$25
+
+;	TAILCALL-64R6: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; TAILCALL-64R6-NEXT: [[TMPLABEL]]:
+;	TAILCALL-64R6-NEXT: 	jrc	$25
+
+; NORELOC-NOT: R_MIPS_JALR
+  ret void
+}
--- a/test/CodeGen/Mips/shrink-wrapping.ll
+++ b/test/CodeGen/Mips/shrink-wrapping.ll
@ -9,11 +9,11 @@
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-STATIC

 ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=true \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=SHRINK-WRAP-PIC

 ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=false \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-PIC

 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \
@ -25,11 +25,11 @@
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-STATIC

 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=SHRINK-WRAP-64-PIC

 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=false \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-PIC

 declare void @f(i32 signext)
--- a/test/CodeGen/X86/debug-loclists.ll
+++ b/test/CodeGen/X86/debug-loclists.ll
@ -11,7 +11,7 @@
 ; CHECK-NEXT:               DW_AT_type [DW_FORM_ref4]     (cu + 0x0040 => {0x00000040} "A")

 ; CHECK:      .debug_loclists contents:
-; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000017, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000015, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 ; CHECK-NEXT: 0x00000000:
 ; CHECK-NEXT:  [0x0000000000000000, 0x0000000000000004): DW_OP_breg5 RDI+0
 ; CHECK-NEXT:  [0x0000000000000004, 0x0000000000000012): DW_OP_breg3 RBX+0
@ -32,13 +32,13 @@
 ; ASM-NEXT:  .byte 4                               # DW_LLE_offset_pair
 ; ASM-NEXT:  .uleb128 .Lfunc_begin0-.Lfunc_begin0  # starting offset
 ; ASM-NEXT:  .uleb128 .Ltmp0-.Lfunc_begin0         # ending offset
-; ASM-NEXT:  .short 2                              # Loc expr size
+; ASM-NEXT:  .byte 2                               # Loc expr size
 ; ASM-NEXT:  .byte 117                             # DW_OP_breg5
 ; ASM-NEXT:  .byte 0                               # 0
 ; ASM-NEXT:  .byte 4                               # DW_LLE_offset_pair
 ; ASM-NEXT:  .uleb128 .Ltmp0-.Lfunc_begin0         # starting offset
 ; ASM-NEXT:  .uleb128 .Ltmp1-.Lfunc_begin0         # ending offset
-; ASM-NEXT:  .short 2                              # Loc expr size
+; ASM-NEXT:  .byte 2                               # Loc expr size
 ; ASM-NEXT:  .byte 115                             # DW_OP_breg3
 ; ASM-NEXT:  .byte 0                               # 0
 ; ASM-NEXT:  .byte 0                               # DW_LLE_end_of_list
--- a/test/CodeGen/X86/discriminate-mem-ops.ll
+++ b/test/CodeGen/X86/discriminate-mem-ops.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -x86-discriminate-memops  < %s | FileCheck %s
 ;
 ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
 ; int sum(int* arr, int pos1, int pos2) {
--- a/test/CodeGen/X86/insert-prefetch-inline.ll
+++ b/test/CodeGen/X86/insert-prefetch-inline.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s
 ;
 ; Verify we can insert prefetch instructions in code belonging to inlined
 ; functions.
--- a/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
+++ b/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s
 ; ModuleID = 'prefetch.cc'
 source_filename = "prefetch.cc"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
--- a/test/CodeGen/X86/insert-prefetch.ll
+++ b/test/CodeGen/X86/insert-prefetch.ll
@ -1,5 +1,5 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS
 ;
 ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
 ; int sum(int* arr, int pos1, int pos2) {
--- a/test/DebugInfo/COFF/types-empty-member-fn.ll
+++ b/test/DebugInfo/COFF/types-empty-member-fn.ll
@ -0,0 +1,72 @@
+; RUN: llc < %s -filetype=obj | llvm-readobj - -codeview | FileCheck %s
+
+; ModuleID = 'foo.3a1fbbbh-cgu.0'
+source_filename = "foo.3a1fbbbh-cgu.0"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; Rust source to regenerate:
+; $ cat foo.rs
+; pub struct Foo;
+; impl Foo {
+;     pub fn foo() {}
+; }
+; $ rustc foo.rs --crate-type lib -Cdebuginfo=1 --emit=llvm-ir
+
+; CHECK:      CodeViewTypes [
+; CHECK:        MemberFunction (0x1006) {
+; CHECK-NEXT:     TypeLeafKind: LF_MFUNCTION (0x1009)
+; CHECK-NEXT:     ReturnType: void (0x3)
+; CHECK-NEXT:     ClassType: foo::Foo (0x1000)
+; CHECK-NEXT:     ThisType: 0x0
+; CHECK-NEXT:     CallingConvention: NearC (0x0)
+; CHECK-NEXT:     FunctionOptions [ (0x0)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     NumParameters: 0
+; CHECK-NEXT:     ArgListType: () (0x1005)
+; CHECK-NEXT:     ThisAdjustment: 0
+; CHECK-NEXT:   }
+; CHECK-NEXT:   MemberFuncId (0x1007) {
+; CHECK-NEXT:     TypeLeafKind: LF_MFUNC_ID (0x1602)
+; CHECK-NEXT:     ClassType: foo::Foo (0x1000)
+; CHECK-NEXT:     FunctionType: void foo::Foo::() (0x1006)
+; CHECK-NEXT:     Name: foo
+; CHECK-NEXT:   }
+; CHECK:      CodeViewDebugInfo [
+; CHECK:        FunctionLineTable [
+; CHECK-NEXT:     LinkageName: _ZN3foo3Foo3foo17hc557c2121772885bE
+; CHECK-NEXT:     Flags: 0x0
+; CHECK-NEXT:     CodeSize: 0x1
+; CHECK-NEXT:     FilenameSegment [
+; CHECK-NEXT:       Filename: D:\rust\foo.rs (0x0)
+; CHECK-NEXT:       +0x0 [
+; CHECK-NEXT:         LineNumberStart: 3
+; CHECK-NEXT:         LineNumberEndDelta: 0
+; CHECK-NEXT:         IsStatement: No
+; CHECK-NEXT:       ]
+; CHECK-NEXT:     ]
+; CHECK-NEXT:   ]
+
+; foo::Foo::foo
+; Function Attrs: uwtable
+define void @_ZN3foo3Foo3foo17hc557c2121772885bE() unnamed_addr #0 !dbg !5 {
+start:
+  ret void, !dbg !10
+}
+
+attributes #0 = { uwtable "target-cpu"="x86-64" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, producer: "clang LLVM (rustc version 1.33.0-nightly (8b0f0156e 2019-01-22))", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
+!1 = !DIFile(filename: "foo.rs", directory: "D:\5Crust")
+!2 = !{}
+!3 = !{i32 2, !"CodeView", i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_ZN3foo3Foo3foo17hc557c2121772885bE", scope: !6, file: !1, line: 3, type: !9, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, templateParams: !2, retainedNodes: !2)
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", scope: !8, file: !7, align: 8, elements: !2, templateParams: !2, identifier: "5105d9fe1a2a3c68518268151b672274")
+!7 = !DIFile(filename: "<unknown>", directory: "")
+!8 = !DINamespace(name: "foo", scope: null)
+!9 = !DISubroutineType(types: !2)
+!10 = !DILocation(line: 3, scope: !5)
--- a/test/DebugInfo/Mips/dwarfdump-tls.ll
+++ b/test/DebugInfo/Mips/dwarfdump-tls.ll
@ -1,12 +1,34 @@
-; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj -o=%t-32.o < %s
+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj \
+; RUN:     -split-dwarf-file=foo.dwo -o=%t-32.o < %s
 ; RUN: llvm-dwarfdump %t-32.o 2>&1 | FileCheck %s
-; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj -o=%t-64.o < %s
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj \
+; RUN:     -split-dwarf-file=foo.dwo -o=%t-64.o < %s
 ; RUN: llvm-dwarfdump %t-64.o 2>&1 | FileCheck %s

+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=asm \
+; RUN:     -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM32 %s
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=asm \
+; RUN:     -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM64 %s
+
@x = thread_local global i32 5, align 4, !dbg !0

 ; CHECK-NOT: error: failed to compute relocation: R_MIPS_TLS_DTPREL

+; CHECK:      DW_AT_name      ("x")
+; CHECK-NEXT: DW_AT_type      (0x00000025 "int")
+; CHECK-NEXT: DW_AT_external  (true)
+; CHECK-NEXT: DW_AT_decl_file (0x01)
+; CHECK-NEXT: DW_AT_decl_line (1)
+; CHECK-NEXT: DW_AT_location  (DW_OP_GNU_const_index 0x0, {{DW_OP_GNU_push_tls_address|DW_OP_form_tls_address}})
+
+; ASM32:              .section        .debug_addr
+; ASM32-NEXT: $addr_table_base0:
+; ASM32-NEXT:         .4byte  x+32768
+
+; ASM64:              .section        .debug_addr
+; ASM64-NEXT: .Laddr_table_base0:
+; ASM64-NEXT:         .8byte  x+32768
+
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!7, !8}

--- a/test/DebugInfo/X86/dwarfdump-debug-loclists.test
+++ b/test/DebugInfo/X86/dwarfdump-debug-loclists.test
@ -9,7 +9,7 @@
 # CHECK-NEXT:    [0x0000000000000700, 0x0000000000000710): DW_OP_breg5 RDI+0

 # CHECK:      .debug_loclists contents:
-# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002c, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 # CHECK-NEXT: 0x00000000:
 # CHECK-NEXT:   [0x0000000000000000, 0x0000000000000010): DW_OP_breg5 RDI+0
 # CHECK-NEXT:   [0x0000000000000530, 0x0000000000000540): DW_OP_breg6 RBP-8, DW_OP_deref
@ -37,7 +37,7 @@
  .byte  4                       # DW_LLE_offset_pair
  .uleb128 0x0                   #   starting offset
  .uleb128 0x10                  #   ending offset
-  .short  2                      # Loc expr size
+  .byte  2                       # Loc expr size
  .byte  117                     # DW_OP_breg5
  .byte  0                       # 0
  
@ -47,7 +47,7 @@
  .byte  4                       # DW_LLE_offset_pair
  .uleb128 0x30                  #   starting offset
  .uleb128 0x40                  #   ending offset
-  .short  3                      # Loc expr size
+  .byte  3                       # Loc expr size
  .byte  118                     # DW_OP_breg6
  .byte  120                     # -8
  .byte  6                       # DW_OP_deref
@ -55,7 +55,7 @@
  .byte  8                       # DW_LLE_start_length
  .quad  0x700                   # Some address
  .uleb128 0x10                  #   length
-  .short  2                      # Loc expr size
+  .byte  2                       # Loc expr size
  .byte  117                     # DW_OP_breg5
  .byte  0                       # 0
  
--- a/test/Transforms/FunctionImport/Inputs/comdat.ll
+++ b/test/Transforms/FunctionImport/Inputs/comdat.ll
@ -0,0 +1,10 @@
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+define void @main() {
+entry:
+  call i8* @lwt_fun()
+  ret void
+}
+
+declare i8* @lwt_fun()
--- a/test/Transforms/FunctionImport/comdat.ll
+++ b/test/Transforms/FunctionImport/comdat.ll
@ -0,0 +1,32 @@
+; Test to ensure that comdat is renamed consistently when comdat leader is
+; promoted and renamed due to an import. Required by COFF.
+
+; REQUIRES: x86-registered-target
+
+; RUN: opt -thinlto-bc -o %t1.bc %s
+; RUN: opt -thinlto-bc -o %t2.bc %S/Inputs/comdat.ll
+; RUN: llvm-lto2 run -save-temps -o %t3 %t1.bc %t2.bc \
+; RUN:          -r %t1.bc,lwt_fun,plx \
+; RUN:          -r %t2.bc,main,plx \
+; RUN:          -r %t2.bc,lwt_fun,
+; RUN: llvm-dis -o - %t3.1.3.import.bc | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+; CHECK: $lwt.llvm.[[HASH:[0-9]+]] = comdat any
+$lwt = comdat any
+
+; CHECK: @lwt_aliasee = private unnamed_addr global {{.*}}, comdat($lwt.llvm.[[HASH]])
+@lwt_aliasee = private unnamed_addr global [1 x i8*] [i8* null], comdat($lwt)
+
+; CHECK: @lwt.llvm.[[HASH]] = hidden unnamed_addr alias
+@lwt = internal unnamed_addr alias [1 x i8*], [1 x i8*]* @lwt_aliasee
+
+; Below function should get imported into other module, resulting in @lwt being
+; promoted and renamed.
+define i8* @lwt_fun() {
+  %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* @lwt, i32 0, i32 0
+  %2 = load i8*, i8** %1
+  ret i8* %2
+}
--- a/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll
+++ b/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll
@ -0,0 +1,33 @@
+; RUN: opt -transform-warning -disable-output < %s 2>&1 | FileCheck -allow-empty %s
+;
+; llvm.org/PR40546
+; Do not warn about about leftover llvm.loop.vectorize.enable for already
+; vectorized loops.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i32 %n) {
+entry:
+  %cmp = icmp eq i32 %n, 0
+  br i1 %cmp, label %simd.if.end, label %omp.inner.for.body.preheader
+
+omp.inner.for.body.preheader:
+  %wide.trip.count = zext i32 %n to i64
+  br label %omp.inner.for.body
+
+omp.inner.for.body:
+  %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !0
+
+simd.if.end:
+  ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.isvectorized"}
+
+
+; CHECK-NOT: loop not vectorized
--- a/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
+++ b/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
@ -0,0 +1,95 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -transform-warning -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
+
+; This test is a copy of no_switch.ll, with the "llvm.loop.vectorize.enable" metadata set to false.
+; It tests that vectorization is explicitly disabled and no warnings are emitted.
+
+; CHECK-NOT: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; CHECK-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; NOANALYSIS-NOT: remark: {{.*}}
+; NOANALYSIS-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly disabled
+; MOREINFO-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; CHECK: _Z11test_switchPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+entry:
+  %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
+  br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !14
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  switch i32 %0, label %for.inc [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb3
+  ], !dbg !14
+
+sw.bb:                                            ; preds = %for.body
+  %1 = trunc i64 %indvars.iv to i32, !dbg !20
+  %mul = shl nsw i32 %1, 1, !dbg !20
+  br label %for.inc, !dbg !22
+
+sw.bb3:                                           ; preds = %for.body
+  %2 = trunc i64 %indvars.iv to i32, !dbg !23
+  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  br label %for.inc, !dbg !23
+
+for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
+  %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
+  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !24
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.5.0"}
+!10 = !DILocation(line: 3, column: 8, scope: !11)
+!11 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!12 = !{!12, !13, !13}
+!13 = !{!"llvm.loop.vectorize.enable", i1 false}
+!14 = !DILocation(line: 4, column: 5, scope: !15)
+!15 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !11)
+!16 = !{!17, !17, i64 0}
+!17 = !{!"int", !18, i64 0}
+!18 = !{!"omnipotent char", !19, i64 0}
+!19 = !{!"Simple C/C++ TBAA"}
+!20 = !DILocation(line: 6, column: 7, scope: !21)
+!21 = distinct !DILexicalBlock(line: 4, column: 18, file: !1, scope: !15)
+!22 = !DILocation(line: 7, column: 5, scope: !21)
+!23 = !DILocation(line: 9, column: 7, scope: !21)
+!24 = !DILocation(line: 14, column: 1, scope: !4)
--- a/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
+++ b/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
@ -6,7 +6,7 @@
 # the final version which uses ULEB128 and not the U32.

 # CHECK:         .debug_loclists contents:
-# CHECK-NEXT:    0x00000000: locations list header: length = 0x0000000f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+# CHECK-NEXT:    0x00000000: locations list header: length = 0x0000000e, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 # CHECK-NEXT:    0x00000000:
 # CHECK-NEXT:    Addr idx 1 (w/ length 16): DW_OP_reg5 RDI

@ -21,7 +21,7 @@
 .byte 3          # DW_LLE_startx_length
 .byte 0x01       # Index
 .uleb128 0x10    # Length
- .short 1         # Loc expr size
+ .byte 1          # Loc expr size
 .byte 85         # DW_OP_reg5
 .byte 0          # DW_LLE_end_of_list
 .Ldebug_loclist_table_end0: